mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-23 21:26:02 +00:00
Compare commits
281 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
643dd33358 | ||
|
|
40e60c6f52 | ||
|
|
eeae6383d0 | ||
|
|
8cbcaeff56 | ||
|
|
ce87d5a89e | ||
|
|
a9d6e86077 | ||
|
|
346f9efe3a | ||
|
|
a987d698c1 | ||
|
|
dbb45dec1a | ||
|
|
5f69a43846 | ||
|
|
fe1e4814fa | ||
|
|
c29749741b | ||
|
|
3e47201365 | ||
|
|
ec9719f3b1 | ||
|
|
b2cc9e4db8 | ||
|
|
56198bae48 | ||
|
|
888059b2d0 | ||
|
|
410f2fc8c3 | ||
|
|
54e244d2f3 | ||
|
|
e0c36972fb | ||
|
|
daadcddb5e | ||
|
|
7f92dafa02 | ||
|
|
cc5d12a368 | ||
|
|
0f98b996b5 | ||
|
|
d005ca5bf7 | ||
|
|
7e65fb1d3e | ||
|
|
cdefb3f665 | ||
|
|
a91887221a | ||
|
|
9c66b20a97 | ||
|
|
a48283527e | ||
|
|
73f78c19b0 | ||
|
|
34639e346e | ||
|
|
7af2a254d6 | ||
|
|
0f9d262a1c | ||
|
|
34765b556b | ||
|
|
dfb4860578 | ||
|
|
ce62713f02 | ||
|
|
8b5d04d60f | ||
|
|
1b74709b91 | ||
|
|
a5c0a282c5 | ||
|
|
4fc048ff20 | ||
|
|
375b5600cd | ||
|
|
32b997d817 | ||
|
|
ff3090e3cc | ||
|
|
6c6645f945 | ||
|
|
af6473d999 | ||
|
|
11851f9701 | ||
|
|
cc4654eabd | ||
|
|
0bb91f4a77 | ||
|
|
f9d57f54df | ||
|
|
3ef1afc0f1 | ||
|
|
dbb5abebb6 | ||
|
|
700f33bd39 | ||
|
|
d01bbbccde | ||
|
|
4fc506f267 | ||
|
|
dc456276e5 | ||
|
|
b2ea50cb10 | ||
|
|
5074cf92ab | ||
|
|
a92bc8d192 | ||
|
|
ee538cf045 | ||
|
|
2b05d63a0f | ||
|
|
104e8918ce | ||
|
|
d6ec4d4f4a | ||
|
|
f0e7326b7a | ||
|
|
c8106a0006 | ||
|
|
c9ab5bc0b6 | ||
|
|
5e0f15fd43 | ||
|
|
4c30f090c7 | ||
|
|
63f247cdda | ||
|
|
e109fa9529 | ||
|
|
76e4ec2168 | ||
|
|
982babdb74 | ||
|
|
7ae2ae33d9 | ||
|
|
cb0788ae07 | ||
|
|
cb3e5dc234 | ||
|
|
59d40a2821 | ||
|
|
98a678e73d | ||
|
|
70292aae3c | ||
|
|
73521f0069 | ||
|
|
4533179604 | ||
|
|
1a21cc1a17 | ||
|
|
d08042f8a7 | ||
|
|
77aadb5f22 | ||
|
|
4fd913f7eb | ||
|
|
8618a4d2ba | ||
|
|
e9c5df7993 | ||
|
|
8a28b3aa77 | ||
|
|
1a0b100ad9 | ||
|
|
ff93563f41 | ||
|
|
2f25258191 | ||
|
|
2859079c32 | ||
|
|
74b83d305f | ||
|
|
70f6e4b828 | ||
|
|
6df196034e | ||
|
|
a63762737c | ||
|
|
77394bd4b9 | ||
|
|
cb87201c8b | ||
|
|
1a9c38794f | ||
|
|
34233efb63 | ||
|
|
af0608ebd6 | ||
|
|
8c7e5c094e | ||
|
|
c064737137 | ||
|
|
1d188a7ad3 | ||
|
|
66a6b65716 | ||
|
|
326652a399 | ||
|
|
59316e8d5a | ||
|
|
76d7f20c87 | ||
|
|
380b2797a5 | ||
|
|
1dd58f9bec | ||
|
|
ddc76ad0dc | ||
|
|
ffacf1c002 | ||
|
|
5a49b93b77 | ||
|
|
918a6eaec9 | ||
|
|
1e6ce70e3e | ||
|
|
b418054ee4 | ||
|
|
58f30e9d8a | ||
|
|
c45172a4bf | ||
|
|
221ba20083 | ||
|
|
93c5fbbb8b | ||
|
|
22d529523a | ||
|
|
ed6f479940 | ||
|
|
f19f712433 | ||
|
|
24a92c2809 | ||
|
|
443cc24408 | ||
|
|
e8d5228250 | ||
|
|
5c33fb090c | ||
|
|
48dd9146e7 | ||
|
|
c1c42e818e | ||
|
|
519905ef9c | ||
|
|
f242377d2b | ||
|
|
da06306274 | ||
|
|
b93b803a2e | ||
|
|
cf43ec4aff | ||
|
|
9795d98e77 | ||
|
|
316b4c047f | ||
|
|
1d701c6980 | ||
|
|
0203adb9cb | ||
|
|
0d05c2ad6e | ||
|
|
b3f44c4abd | ||
|
|
62115f57b1 | ||
|
|
9023172139 | ||
|
|
59631afd9a | ||
|
|
c2584c6edd | ||
|
|
685663af3c | ||
|
|
72b4b41443 | ||
|
|
70aa768d48 | ||
|
|
6029677eec | ||
|
|
3c78f4121e | ||
|
|
89170dd78f | ||
|
|
6379a62d95 | ||
|
|
4c05c0cf96 | ||
|
|
ce832da16c | ||
|
|
14de657d36 | ||
|
|
9a36c090bf | ||
|
|
3aca010b42 | ||
|
|
62c11ce3f3 | ||
|
|
f358538f4f | ||
|
|
9068857ba1 | ||
|
|
d241157084 | ||
|
|
69f73b1d74 | ||
|
|
202794f620 | ||
|
|
38cbd54604 | ||
|
|
3877e0043c | ||
|
|
f95398420b | ||
|
|
53905c1362 | ||
|
|
113aac8815 | ||
|
|
d2071dde1f | ||
|
|
4502af5aed | ||
|
|
06af68aa07 | ||
|
|
6d378c6397 | ||
|
|
ec0c0cf779 | ||
|
|
851694e323 | ||
|
|
ea92c64fdc | ||
|
|
dc36f681be | ||
|
|
48f1987a8d | ||
|
|
b98e2cef81 | ||
|
|
9f79ce82af | ||
|
|
5f18a9b2ee | ||
|
|
7f8a1ac0be | ||
|
|
1a67163ee8 | ||
|
|
38141de68d | ||
|
|
7a98b80687 | ||
|
|
229a12c8e6 | ||
|
|
2fdfe79400 | ||
|
|
9184b12a26 | ||
|
|
742378d8e1 | ||
|
|
6dcd739a8b | ||
|
|
f97384da6c | ||
|
|
6ea76f2771 | ||
|
|
715b255371 | ||
|
|
db094d3923 | ||
|
|
c29bdcae23 | ||
|
|
75219181a3 | ||
|
|
a5b5cf7cd1 | ||
|
|
142ba8ea00 | ||
|
|
4bc823e07c | ||
|
|
db06ca7138 | ||
|
|
95595a768e | ||
|
|
36f649768e | ||
|
|
0c6fc243f2 | ||
|
|
dfc46d5627 | ||
|
|
11d55f2121 | ||
|
|
014da57cf6 | ||
|
|
70a0ff4a8f | ||
|
|
dd0d5e4b90 | ||
|
|
15b3bb1700 | ||
|
|
077ec2ab11 | ||
|
|
f25db0795e | ||
|
|
c50a337c29 | ||
|
|
efeae09ce1 | ||
|
|
ad55b48664 | ||
|
|
94eabd34e6 | ||
|
|
6935589f74 | ||
|
|
4beb452027 | ||
|
|
b722da303a | ||
|
|
ad39263b94 | ||
|
|
0ffb08b112 | ||
|
|
ff80b4d0ff | ||
|
|
7fb4404928 | ||
|
|
8405f0bf9c | ||
|
|
3a7f9b56fe | ||
|
|
61034e2e2e | ||
|
|
108d6d3344 | ||
|
|
35bd00f6a1 | ||
|
|
69059d67ef | ||
|
|
e13783103f | ||
|
|
f719665c4e | ||
|
|
638f284614 | ||
|
|
32ac98ed95 | ||
|
|
46aee695ca | ||
|
|
716c67f858 | ||
|
|
fec10bb2d6 | ||
|
|
3dac2cf73e | ||
|
|
03eca800e6 | ||
|
|
28fa2e960e | ||
|
|
a3b9220f84 | ||
|
|
c09d48edf2 | ||
|
|
ae4ab0ebbb | ||
|
|
900a9a6d59 | ||
|
|
fc560e6730 | ||
|
|
e2a06470b7 | ||
|
|
ada27323f2 | ||
|
|
607a1c2395 | ||
|
|
b56956ea0c | ||
|
|
3d21290f7f | ||
|
|
4edd4c06bc | ||
|
|
566baddc6b | ||
|
|
febe3186ce | ||
|
|
5dd42c1871 | ||
|
|
8670793e6e | ||
|
|
41a04aa3ab | ||
|
|
88f841bc05 | ||
|
|
d19892d2ea | ||
|
|
c0905d6650 | ||
|
|
576d7d94b1 | ||
|
|
f4f1334b62 | ||
|
|
aaff6c3685 | ||
|
|
42d2af4c84 | ||
|
|
6be91c824c | ||
|
|
6ee0537db8 | ||
|
|
3fbeff4308 | ||
|
|
375546b61a | ||
|
|
25a1d50763 | ||
|
|
6f0d26c22c | ||
|
|
4fe073cc1a | ||
|
|
5cd3d36d20 | ||
|
|
9f4dcd04e9 | ||
|
|
d7ad76ea1e | ||
|
|
e82bb93221 | ||
|
|
000cb93aad | ||
|
|
ad4f5514b9 | ||
|
|
8d29a29867 | ||
|
|
d7de819d11 | ||
|
|
7a6cf30cb2 | ||
|
|
e43d67591c | ||
|
|
134237d1eb | ||
|
|
26d9070aa7 | ||
|
|
f9ffb8ada5 | ||
|
|
a47888f02c | ||
|
|
5bef2f4d86 | ||
|
|
d52c7dcc94 |
1
.github/dependabot.yml
vendored
1
.github/dependabot.yml
vendored
@@ -7,6 +7,5 @@ updates:
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
labels:
|
||||
- 'skip changelog'
|
||||
- 'dependencies'
|
||||
rebase-strategy: disabled
|
||||
|
||||
33
.github/release-draft-template.yml
vendored
33
.github/release-draft-template.yml
vendored
@@ -1,33 +0,0 @@
|
||||
name-template: 'v$RESOLVED_VERSION'
|
||||
tag-template: 'v$RESOLVED_VERSION'
|
||||
exclude-labels:
|
||||
- 'skip changelog'
|
||||
version-resolver:
|
||||
minor:
|
||||
labels:
|
||||
- 'enhancement'
|
||||
default: patch
|
||||
categories:
|
||||
- title: '⚠️ Breaking changes'
|
||||
label: 'breaking-change'
|
||||
- title: '🚀 Enhancements'
|
||||
label: 'enhancement'
|
||||
- title: '🐛 Bug Fixes'
|
||||
label: 'bug'
|
||||
- title: '🔒 Security'
|
||||
label: 'security'
|
||||
- title: '⚙️ Maintenance/misc'
|
||||
label:
|
||||
- 'maintenance'
|
||||
- 'documentation'
|
||||
template: |
|
||||
$CHANGES
|
||||
|
||||
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
|
||||
no-changes-template: 'Changes are coming soon 😎'
|
||||
sort-direction: 'ascending'
|
||||
replacers:
|
||||
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/bench-pr.yml
vendored
2
.github/workflows/bench-pr.yml
vendored
@@ -66,7 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/bench-push-indexing.yml
vendored
2
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-pr.yml
vendored
2
.github/workflows/benchmarks-pr.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/flaky-tests.yml
vendored
2
.github/workflows/flaky-tests.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
|
||||
2
.github/workflows/fuzzer-indexing.yml
vendored
2
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
2
.github/workflows/publish-docker-images.yml
vendored
2
.github/workflows/publish-docker-images.yml
vendored
@@ -65,7 +65,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
|
||||
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
|
||||
21
.github/workflows/publish-release-assets.yml
vendored
21
.github/workflows/publish-release-assets.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
check-version:
|
||||
name: Check the version validity
|
||||
runs-on: ubuntu-latest
|
||||
# No need to check the version for dry run (cron)
|
||||
# No need to check the version for dry run (cron or workflow_dispatch)
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
# Check if the tag has the v<nmumber>.<number>.<number> format.
|
||||
@@ -45,10 +45,10 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
@@ -75,10 +75,10 @@ jobs:
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
command: build
|
||||
args: --release --target ${{ matrix.target }}
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@@ -176,7 +176,7 @@ jobs:
|
||||
- name: List target output files
|
||||
run: ls -lR ./target
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
@@ -187,6 +187,7 @@ jobs:
|
||||
|
||||
publish-openapi-file:
|
||||
name: Publish OpenAPI file
|
||||
needs: check-version
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -201,7 +202,7 @@ jobs:
|
||||
cd crates/openapi-generator
|
||||
cargo run --release -- --pretty --output ../../meilisearch.json
|
||||
- name: Upload OpenAPI to Release
|
||||
# No need to upload for dry run (cron)
|
||||
# No need to upload for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
|
||||
20
.github/workflows/release-drafter.yml
vendored
20
.github/workflows/release-drafter.yml
vendored
@@ -1,20 +0,0 @@
|
||||
name: Release Drafter
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
update_release_draft:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: release-drafter/release-drafter@v6
|
||||
with:
|
||||
config-name: release-draft-template.yml
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFTER_TOKEN }}
|
||||
16
.github/workflows/sdks-tests.yml
vendored
16
.github/workflows/sdks-tests.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-dotnet
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v4
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: "8.0.x"
|
||||
- name: Install dependencies
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v5
|
||||
@@ -135,13 +135,13 @@ jobs:
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v5
|
||||
with:
|
||||
java-version: 8
|
||||
distribution: 'zulu'
|
||||
java-version: 17
|
||||
distribution: 'temurin'
|
||||
cache: gradle
|
||||
- name: Grant execute permission for gradlew
|
||||
run: chmod +x gradlew
|
||||
- name: Build and run unit and integration tests
|
||||
run: ./gradlew build integrationTest
|
||||
run: ./gradlew build integrationTest --info
|
||||
|
||||
meilisearch-js-tests:
|
||||
needs: define-docker-image
|
||||
@@ -160,7 +160,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
@@ -224,7 +224,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
- name: Install pipenv
|
||||
uses: dschep/install-pipenv-action@v1
|
||||
- name: Install dependencies
|
||||
@@ -318,7 +318,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
|
||||
14
.github/workflows/test-suite.yml
vendored
14
.github/workflows/test-suite.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo check without any default features
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@@ -153,7 +153,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run tests in debug
|
||||
@@ -167,7 +167,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Install sd
|
||||
|
||||
1253
Cargo.lock
generated
1253
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.21.0"
|
||||
version = "1.25.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.85-alpine3.20 AS compiler
|
||||
FROM rust:1.89-alpine3.22 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
@@ -20,7 +20,7 @@ RUN set -eux; \
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
|
||||
# Run
|
||||
FROM alpine:3.20
|
||||
FROM alpine:3.22
|
||||
LABEL org.opencontainers.image.source="https://github.com/meilisearch/meilisearch"
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
|
||||
28
LICENSE
28
LICENSE
@@ -1,29 +1,9 @@
|
||||
MIT License
|
||||
# License
|
||||
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Part of this work fall under the Meilisearch Enterprise Edition (EE) and are licensed under the Business Source License 1.1, please refer to [LICENSE-EE](./LICENSE-EE) for details.
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
The other parts of this work are licensed under the [MIT license](./LICENSE-MIT).
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
🔒 Meilisearch Enterprise Edition (EE)
|
||||
|
||||
Certain parts of this codebase are not licensed under the MIT license and governed by the Business Source License 1.1.
|
||||
|
||||
See the LICENSE-EE file for details.
|
||||
`SPDX-License-Identifier: MIT AND BUSL-1.1`
|
||||
21
LICENSE-MIT
Normal file
21
LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -39,6 +39,7 @@
|
||||
## 🖥 Examples
|
||||
|
||||
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
|
||||
- [**Flickr**](https://flickr.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — Search and explore one hundred million Flickr images with semantic search.
|
||||
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
|
||||
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
|
||||
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
|
||||
@@ -121,7 +122,7 @@ If you want to know more about the kind of data we collect and what we use it fo
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.meilisearch.com/careers), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
🗞 [Subscribe to our newsletter](https://share-eu1.hsforms.com/1LN5N0x_GQgq7ss7tXmSykwfg3aq) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
💌 Want to make a suggestion or give feedback? Here are some of the channels where you can reach us:
|
||||
|
||||
|
||||
@@ -158,6 +158,9 @@ pub enum KindDump {
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -240,6 +243,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
KindWithContent::IndexCompaction { index_uid } => {
|
||||
KindDump::IndexCompaction { index_uid }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,6 +97,7 @@ impl CompatV2ToV3 {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum CompatIndexV2ToV3 {
|
||||
V2(v2::V2IndexReader),
|
||||
Compat(Box<CompatIndexV1ToV2>),
|
||||
|
||||
@@ -60,7 +60,7 @@ impl FileStore {
|
||||
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
|
||||
let path = self.get_update_path(uuid);
|
||||
let path = self.update_path(uuid);
|
||||
let file = match StdFile::open(path) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
@@ -72,7 +72,7 @@ impl FileStore {
|
||||
}
|
||||
|
||||
/// Returns the path that correspond to this uuid, the path could not exists.
|
||||
pub fn get_update_path(&self, uuid: Uuid) -> PathBuf {
|
||||
pub fn update_path(&self, uuid: Uuid) -> PathBuf {
|
||||
self.path.join(uuid.to_string())
|
||||
}
|
||||
|
||||
|
||||
@@ -7,23 +7,14 @@
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::char;
|
||||
use nom::character::complete::multispace0;
|
||||
use nom::character::complete::multispace1;
|
||||
use nom::combinator::cut;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::value;
|
||||
use nom::sequence::preceded;
|
||||
use nom::sequence::{terminated, tuple};
|
||||
use nom::character::complete::{char, multispace0, multispace1};
|
||||
use nom::combinator::{cut, map, value};
|
||||
use nom::sequence::{preceded, terminated, tuple};
|
||||
use Condition::*;
|
||||
|
||||
use crate::error::IResultExt;
|
||||
use crate::value::parse_vector_value;
|
||||
use crate::value::parse_vector_value_cut;
|
||||
use crate::Error;
|
||||
use crate::ErrorKind;
|
||||
use crate::VectorFilter;
|
||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
||||
use crate::value::{parse_vector_value, parse_vector_value_cut};
|
||||
use crate::{parse_value, Error, ErrorKind, FilterCondition, IResult, Span, Token, VectorFilter};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Condition<'a> {
|
||||
@@ -124,7 +115,7 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
||||
}
|
||||
|
||||
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
|
||||
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter)> {
|
||||
let (input, _) = multispace0(input)?;
|
||||
let (input, fid) = tag("_vectors")(input)?;
|
||||
|
||||
|
||||
@@ -75,7 +75,11 @@ pub enum ExpectedValueKind {
|
||||
pub enum ErrorKind<'a> {
|
||||
ReservedGeo(&'a str),
|
||||
GeoRadius,
|
||||
GeoRadiusArgumentCount(usize),
|
||||
GeoBoundingBox,
|
||||
GeoPolygon,
|
||||
GeoPolygonNotEnoughPoints(usize),
|
||||
GeoCoordinatesNotPair(usize),
|
||||
MisusedGeoRadius,
|
||||
MisusedGeoBoundingBox,
|
||||
VectorFilterLeftover,
|
||||
@@ -189,7 +193,7 @@ impl Display for Error<'_> {
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` {text}")?
|
||||
}
|
||||
ErrorKind::InvalidEscapedNumber => {
|
||||
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
||||
@@ -198,11 +202,23 @@ impl Display for Error<'_> {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
}
|
||||
ErrorKind::GeoRadius => {
|
||||
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
|
||||
writeln!(f, "The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.")?
|
||||
}
|
||||
ErrorKind::GeoRadiusArgumentCount(count) => {
|
||||
writeln!(f, "Was expecting 3 or 4 arguments for `_geoRadius`, but instead found {count}.")?
|
||||
}
|
||||
ErrorKind::GeoBoundingBox => {
|
||||
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
|
||||
}
|
||||
ErrorKind::GeoPolygon => {
|
||||
writeln!(f, "The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.")?
|
||||
}
|
||||
ErrorKind::GeoPolygonNotEnoughPoints(n) => {
|
||||
writeln!(f, "The `_geoPolygon` filter expects at least 3 points but only {n} were specified")?;
|
||||
}
|
||||
ErrorKind::GeoCoordinatesNotPair(number) => {
|
||||
writeln!(f, "Was expecting 2 coordinates but instead found {number}.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
//! word = (alphanumeric | _ | - | .)+
|
||||
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
|
||||
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
|
||||
//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
|
||||
//! ```
|
||||
//!
|
||||
//! Other BNF grammar used to handle some specific errors:
|
||||
@@ -116,7 +117,7 @@ impl<'a> Token<'a> {
|
||||
self.span
|
||||
}
|
||||
|
||||
pub fn parse_finite_float(&self) -> Result<f64, Error> {
|
||||
pub fn parse_finite_float(&self) -> Result<f64, Error<'a>> {
|
||||
let value: f64 = self.value().parse().map_err(|e| self.as_external_error(e))?;
|
||||
if value.is_finite() {
|
||||
Ok(value)
|
||||
@@ -156,8 +157,9 @@ pub enum FilterCondition<'a> {
|
||||
Or(Vec<Self>),
|
||||
And(Vec<Self>),
|
||||
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a>, resolution: Option<Token<'a>> },
|
||||
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
||||
GeoPolygon { points: Vec<[Token<'a>; 2]> },
|
||||
}
|
||||
|
||||
pub enum TraversedElement<'a> {
|
||||
@@ -166,7 +168,7 @@ pub enum TraversedElement<'a> {
|
||||
}
|
||||
|
||||
impl<'a> FilterCondition<'a> {
|
||||
pub fn use_contains_operator(&self) -> Option<&Token> {
|
||||
pub fn use_contains_operator(&self) -> Option<&Token<'a>> {
|
||||
match self {
|
||||
FilterCondition::Condition { fid: _, op } => match op {
|
||||
Condition::GreaterThan(_)
|
||||
@@ -189,11 +191,12 @@ impl<'a> FilterCondition<'a> {
|
||||
FilterCondition::VectorExists { .. }
|
||||
| FilterCondition::GeoLowerThan { .. }
|
||||
| FilterCondition::GeoBoundingBox { .. }
|
||||
| FilterCondition::GeoPolygon { .. }
|
||||
| FilterCondition::In { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn use_vector_filter(&self) -> Option<&Token> {
|
||||
pub fn use_vector_filter(&self) -> Option<&Token<'a>> {
|
||||
match self {
|
||||
FilterCondition::Condition { .. } => None,
|
||||
FilterCondition::Not(this) => this.use_vector_filter(),
|
||||
@@ -202,12 +205,13 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
FilterCondition::GeoLowerThan { .. }
|
||||
| FilterCondition::GeoBoundingBox { .. }
|
||||
| FilterCondition::GeoPolygon { .. }
|
||||
| FilterCondition::In { .. } => None,
|
||||
FilterCondition::VectorExists { fid, .. } => Some(fid),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token<'a>> + '_> {
|
||||
if depth == 0 {
|
||||
return Box::new(std::iter::empty());
|
||||
}
|
||||
@@ -228,7 +232,7 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
|
||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token<'a>> {
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid),
|
||||
FilterCondition::Or(subfilters) => {
|
||||
@@ -396,23 +400,27 @@ fn parse_not(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to allow space BEFORE the _geoRadius but not after
|
||||
let parsed = preceded(
|
||||
tuple((multispace0, word_exact("_geoRadius"))),
|
||||
|
||||
let (input, _) = tuple((multispace0, word_exact("_geoRadius")))(input)?;
|
||||
|
||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
)(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius)));
|
||||
|
||||
let parsed =
|
||||
delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))(input)
|
||||
.map_cut(ErrorKind::GeoRadius);
|
||||
|
||||
let (input, args) = parsed?;
|
||||
|
||||
if args.len() != 3 {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius)));
|
||||
if !(3..=4).contains(&args.len()) {
|
||||
return Err(Error::failure_from_kind(input, ErrorKind::GeoRadiusArgumentCount(args.len())));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoLowerThan {
|
||||
point: [args[0].into(), args[1].into()],
|
||||
radius: args[2].into(),
|
||||
resolution: args.get(3).cloned().map(Token::from),
|
||||
};
|
||||
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
@@ -420,26 +428,33 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||
/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression.
|
||||
fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to allow space BEFORE the _geoBoundingBox but not after
|
||||
let parsed = preceded(
|
||||
tuple((multispace0, word_exact("_geoBoundingBox"))),
|
||||
|
||||
let (input, _) = tuple((multispace0, word_exact("_geoBoundingBox")))(input)?;
|
||||
|
||||
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
|
||||
cut(delimited(
|
||||
|
||||
let (input, args) = delimited(
|
||||
char('('),
|
||||
separated_list1(
|
||||
tag(","),
|
||||
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
||||
),
|
||||
char(')'),
|
||||
)),
|
||||
)(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
|
||||
.map_cut(ErrorKind::GeoBoundingBox)?;
|
||||
|
||||
let (input, args) = parsed?;
|
||||
|
||||
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
|
||||
if args.len() != 2 {
|
||||
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
|
||||
}
|
||||
|
||||
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
|
||||
let context = offending.first().unwrap_or(&input);
|
||||
return Err(Error::failure_from_kind(
|
||||
*context,
|
||||
ErrorKind::GeoCoordinatesNotPair(offending.len()),
|
||||
));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoBoundingBox {
|
||||
top_right_point: [args[0][0].into(), args[0][1].into()],
|
||||
bottom_left_point: [args[1][0].into(), args[1][1].into()],
|
||||
@@ -447,6 +462,47 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
|
||||
/// If we parse `_geoPolygon` we MUST parse the rest of the expression.
|
||||
fn parse_geo_polygon(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to allow space BEFORE the _geoPolygon but not after
|
||||
|
||||
let (input, _) = tuple((multispace0, word_exact("_geoPolygon")))(input)?;
|
||||
|
||||
// if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure
|
||||
|
||||
let (input, args): (_, Vec<Vec<LocatedSpan<_, _>>>) = delimited(
|
||||
char('('),
|
||||
separated_list1(
|
||||
tag(","),
|
||||
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
||||
),
|
||||
preceded(opt(ws(char(','))), char(')')), // Tolerate trailing comma
|
||||
)(input)
|
||||
.map_cut(ErrorKind::GeoPolygon)?;
|
||||
|
||||
if args.len() < 3 {
|
||||
let context = args.last().and_then(|a| a.last()).unwrap_or(&input);
|
||||
return Err(Error::failure_from_kind(
|
||||
*context,
|
||||
ErrorKind::GeoPolygonNotEnoughPoints(args.len()),
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
|
||||
let context = offending.first().unwrap_or(&input);
|
||||
return Err(Error::failure_from_kind(
|
||||
*context,
|
||||
ErrorKind::GeoCoordinatesNotPair(offending.len()),
|
||||
));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoPolygon {
|
||||
points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(),
|
||||
};
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoPoint but not after
|
||||
@@ -516,8 +572,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
||||
}),
|
||||
),
|
||||
parse_geo_radius,
|
||||
parse_geo_bounding_box,
|
||||
// Made a random block of functions because we reached the maximum number of elements per alt
|
||||
alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)),
|
||||
parse_in,
|
||||
parse_not_in,
|
||||
parse_condition,
|
||||
@@ -597,9 +653,12 @@ impl std::fmt::Display for FilterCondition<'_> {
|
||||
}
|
||||
write!(f, " EXISTS")
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
FilterCondition::GeoLowerThan { point, radius, resolution: None } => {
|
||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius, resolution: Some(resolution) } => {
|
||||
write!(f, "_geoRadius({}, {}, {}, {})", point[0], point[1], radius, resolution)
|
||||
}
|
||||
FilterCondition::GeoBoundingBox {
|
||||
top_right_point: top_left_point,
|
||||
bottom_left_point: bottom_right_point,
|
||||
@@ -613,6 +672,13 @@ impl std::fmt::Display for FilterCondition<'_> {
|
||||
bottom_right_point[1]
|
||||
)
|
||||
}
|
||||
FilterCondition::GeoPolygon { points } => {
|
||||
write!(f, "_geoPolygon([")?;
|
||||
for point in points {
|
||||
write!(f, "[{}, {}], ", point[0], point[1])?;
|
||||
}
|
||||
write!(f, "])")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -651,7 +717,7 @@ pub mod tests {
|
||||
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
|
||||
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
|
||||
// if the string is empty we still need to return 1 for the line number
|
||||
let lines = before.is_empty().then_some(1).unwrap_or_else(|| before.lines().count());
|
||||
let lines = if before.is_empty() { 1 } else { before.lines().count() };
|
||||
let offset = before.chars().count();
|
||||
// the extra field is not checked in the tests so we can set it to nothing
|
||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||
@@ -776,12 +842,17 @@ pub mod tests {
|
||||
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
||||
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_snapshot!(p("_geoRadius(12,13,14,1000)"), @"_geoRadius({12}, {13}, {14}, {1000})");
|
||||
|
||||
// Test geo bounding box
|
||||
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
||||
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
|
||||
// Test geo polygon
|
||||
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [16, 17])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{16}, {17}], ])");
|
||||
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [-1.2,2939.2], [1,1])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{-1.2}, {2939.2}], [{1}, {1}], ])");
|
||||
|
||||
// Test OR + AND
|
||||
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
@@ -838,50 +909,80 @@ pub mod tests {
|
||||
11:12 channel = 🐻 AND followers < 100
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p("'OR'"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
insta::assert_snapshot!(p("'OR'"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `\'OR\'`.
|
||||
1:5 'OR'
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("OR"), @r###"
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
1:3 OR
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p("channel Ponce"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
insta::assert_snapshot!(p("channel Ponce"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `channel Ponce`.
|
||||
1:14 channel Ponce
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
insta::assert_snapshot!(p("channel = Ponce OR"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` but instead got nothing.
|
||||
19:19 channel = Ponce OR
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoRadius"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:11 _geoRadius
|
||||
"###);
|
||||
insta::assert_snapshot!(p("_geoRadius"), @r"
|
||||
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
|
||||
11:11 _geoRadius
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:16 _geoRadius = 12
|
||||
"###);
|
||||
insta::assert_snapshot!(p("_geoRadius = 12"), @r"
|
||||
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
|
||||
11:16 _geoRadius = 12
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox"), @r"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:16 _geoBoundingBox
|
||||
"###);
|
||||
16:16 _geoBoundingBox
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:21 _geoBoundingBox = 12
|
||||
"###);
|
||||
16:21 _geoBoundingBox = 12
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:26 _geoBoundingBox(1.0, 1.0)
|
||||
"###);
|
||||
17:26 _geoBoundingBox(1.0, 1.0)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2,3])"), @r"
|
||||
The `_geoPolygon` filter expects at least 3 points but only 1 were specified
|
||||
18:19 _geoPolygon([1,2,3])
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon(1,2,3)"), @r"
|
||||
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||
13:19 _geoPolygon(1,2,3)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2],[1,2,3])"), @r"
|
||||
Was expecting 2 coordinates but instead found 3.
|
||||
26:27 _geoPolygon([1,2],[1,2],[1,2,3])
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2,3])"), @r"
|
||||
The `_geoPolygon` filter expects at least 3 points but only 2 were specified
|
||||
24:25 _geoPolygon([1,2],[1,2,3])
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon(1)"), @r"
|
||||
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||
13:15 _geoPolygon(1)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2)"), @r"
|
||||
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||
17:18 _geoPolygon([1,2)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
@@ -938,15 +1039,15 @@ pub mod tests {
|
||||
34:35 channel = mv OR followers >= 1000)
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
insta::assert_snapshot!(p("colour NOT EXIST"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `colour NOT EXIST`.
|
||||
1:17 colour NOT EXIST
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `subscribers 100 TO1000`.
|
||||
1:23 subscribers 100 TO1000
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
||||
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
||||
@@ -1071,38 +1172,38 @@ pub mod tests {
|
||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||
insta::assert_snapshot!(p(r#"value NULL"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NULL`.
|
||||
1:11 value NULL
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT NULL`.
|
||||
1:15 value NOT NULL
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value EMPTY"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value EMPTY`.
|
||||
1:12 value EMPTY
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT EMPTY`.
|
||||
1:16 value NOT EMPTY
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS`.
|
||||
1:9 value IS
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS NOT"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT`.
|
||||
1:13 value IS NOT
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS EXISTS`.
|
||||
1:16 value IS EXISTS
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT EXISTS`.
|
||||
1:20 value IS NOT EXISTS
|
||||
"###);
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -14,6 +14,7 @@ license.workspace = true
|
||||
anyhow = "1.0.98"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.8.0"
|
||||
@@ -32,6 +33,7 @@ rayon = "1.10.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
@@ -45,6 +47,9 @@ tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
|
||||
rusty-s3 = "0.8.1"
|
||||
tokio = { version = "1.47.1", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![allow(clippy::result_large_err)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
@@ -232,6 +234,9 @@ impl<'a> Dump<'a> {
|
||||
}
|
||||
}
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
KindDump::IndexCompaction { index_uid } => {
|
||||
KindWithContent::IndexCompaction { index_uid }
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::milli::index::RollbackOutcome;
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use meilisearch_types::{heed, milli};
|
||||
use reqwest::StatusCode;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::TaskId;
|
||||
@@ -127,6 +128,14 @@ pub enum Error {
|
||||
#[error("Aborted task")]
|
||||
AbortedTask,
|
||||
|
||||
#[error("S3 error: status: {status}, body: {body}")]
|
||||
S3Error { status: StatusCode, body: String },
|
||||
#[error("S3 HTTP error: {0}")]
|
||||
S3HttpError(reqwest::Error),
|
||||
#[error("S3 XML error: {0}")]
|
||||
S3XmlError(Box<dyn std::error::Error + Send + Sync>),
|
||||
#[error("S3 bucket error: {0}")]
|
||||
S3BucketError(rusty_s3::BucketError),
|
||||
#[error(transparent)]
|
||||
Dump(#[from] dump::Error),
|
||||
#[error(transparent)]
|
||||
@@ -226,6 +235,10 @@ impl Error {
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::FromRemoteWhenExporting { .. }
|
||||
| Error::AbortedTask
|
||||
| Error::S3Error { .. }
|
||||
| Error::S3HttpError(_)
|
||||
| Error::S3XmlError(_)
|
||||
| Error::S3BucketError(_)
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli { .. }
|
||||
@@ -293,8 +306,14 @@ impl ErrorCode for Error {
|
||||
Error::BatchNotFound(_) => Code::BatchNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
// TODO: not sure of the Code to use
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::S3Error { status, .. } if status.is_client_error() => {
|
||||
Code::InvalidS3SnapshotRequest
|
||||
}
|
||||
Error::S3Error { .. } => Code::S3SnapshotServerError,
|
||||
Error::S3HttpError(_) => Code::S3SnapshotServerError,
|
||||
Error::S3XmlError(_) => Code::S3SnapshotServerError,
|
||||
Error::S3BucketError(_) => Code::InvalidS3SnapshotParameters,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
|
||||
@@ -199,7 +199,7 @@ impl IndexMapper {
|
||||
let uuid = Uuid::new_v4();
|
||||
self.index_mapping.put(&mut wtxn, name, &uuid)?;
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
fs::create_dir_all(&index_path)?;
|
||||
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
@@ -286,7 +286,7 @@ impl IndexMapper {
|
||||
};
|
||||
|
||||
let index_map = self.index_map.clone();
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
let index_name = name.to_string();
|
||||
thread::Builder::new()
|
||||
.name(String::from("index_deleter"))
|
||||
@@ -341,6 +341,26 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Closes the specified index.
|
||||
///
|
||||
/// This operation involves closing the underlying environment and so can take a long time to complete.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
|
||||
/// in memory hash map.
|
||||
pub fn close_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
|
||||
let uuid = self
|
||||
.index_mapping
|
||||
.get(rtxn, name)?
|
||||
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
||||
|
||||
// We remove the index from the in-memory index map.
|
||||
self.index_map.write().unwrap().close_for_resize(&uuid, self.enable_mdb_writemap, 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return an index, may open it if it wasn't already opened.
|
||||
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
@@ -388,7 +408,7 @@ impl IndexMapper {
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
// take the lock to reopen the environment.
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
@@ -405,7 +425,7 @@ impl IndexMapper {
|
||||
// if it's not already there.
|
||||
match index_map.get(&uuid) {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
|
||||
break index_map
|
||||
.create(
|
||||
@@ -432,6 +452,14 @@ impl IndexMapper {
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
/// Returns the path of the index.
|
||||
///
|
||||
/// The folder located at this path is containing the data.mdb,
|
||||
/// the lock.mdb and an optional data.mdb.cpy file.
|
||||
pub fn index_path(&self, uuid: Uuid) -> PathBuf {
|
||||
self.base_path.join(uuid.to_string())
|
||||
}
|
||||
|
||||
pub fn rollback_index(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
@@ -472,7 +500,7 @@ impl IndexMapper {
|
||||
};
|
||||
}
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
Index::rollback(milli::heed::EnvOpenOptions::new().read_txn_without_tls(), index_path, to)
|
||||
.map_err(|err| crate::Error::from_milli(err, Some(name.to_string())))
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
run_loop_iteration: _,
|
||||
embedders: _,
|
||||
chat_settings: _,
|
||||
runtime: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
@@ -317,6 +318,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
|
||||
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// The main Error type is large and boxing the large variant make the pattern matching fails
|
||||
#![allow(clippy::result_large_err)]
|
||||
|
||||
/*!
|
||||
This crate defines the index scheduler, which is responsible for:
|
||||
1. Keeping references to meilisearch's indexes and mapping them to their
|
||||
@@ -213,6 +216,9 @@ pub struct IndexScheduler {
|
||||
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc<RwLock<usize>>,
|
||||
|
||||
/// The tokio runtime used for asynchronous tasks.
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
@@ -239,6 +245,7 @@ impl IndexScheduler {
|
||||
run_loop_iteration: self.run_loop_iteration.clone(),
|
||||
features: self.features.clone(),
|
||||
chat_settings: self.chat_settings,
|
||||
runtime: self.runtime.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,13 +259,23 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
#[allow(private_interfaces)] // because test_utils is private
|
||||
pub fn new(
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
||||
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
) -> Result<Self> {
|
||||
let this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
|
||||
|
||||
this.run();
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
fn new_without_run(
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
) -> Result<Self> {
|
||||
std::fs::create_dir_all(&options.tasks_path)?;
|
||||
std::fs::create_dir_all(&options.update_file_path)?;
|
||||
@@ -313,8 +330,7 @@ impl IndexScheduler {
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
let this = Self {
|
||||
Ok(Self {
|
||||
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
|
||||
version,
|
||||
queue,
|
||||
@@ -330,21 +346,38 @@ impl IndexScheduler {
|
||||
webhooks: Arc::new(webhooks),
|
||||
embedders: Default::default(),
|
||||
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr,
|
||||
#[cfg(test)]
|
||||
planned_failures,
|
||||
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||
test_breakpoint_sdr: crossbeam_channel::bounded(0).0,
|
||||
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||
planned_failures: Default::default(),
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc::new(RwLock::new(0)),
|
||||
features,
|
||||
chat_settings,
|
||||
};
|
||||
runtime,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
#[cfg(test)]
|
||||
fn new_test(
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
||||
planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
||||
) -> Result<Self> {
|
||||
let mut this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
|
||||
|
||||
this.test_breakpoint_sdr = test_breakpoint_sdr;
|
||||
this.planned_failures = planned_failures;
|
||||
|
||||
this.run();
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
|
||||
fn read_txn(&self) -> Result<RoTxn<'_, WithoutTls>> {
|
||||
self.env.read_txn().map_err(|e| e.into())
|
||||
}
|
||||
|
||||
@@ -757,7 +790,7 @@ impl IndexScheduler {
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump> {
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {
|
||||
Dump::new(self)
|
||||
}
|
||||
|
||||
@@ -806,10 +839,8 @@ impl IndexScheduler {
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(self.rtxn, task_id)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||
.ok_or_else(|| {
|
||||
io::Error::new(io::ErrorKind::Other, Error::CorruptedTaskQueue)
|
||||
})?;
|
||||
.map_err(io::Error::other)?
|
||||
.ok_or_else(|| io::Error::other(Error::CorruptedTaskQueue))?;
|
||||
|
||||
serde_json::to_writer(&mut self.buffer, &TaskView::from_task(&task))?;
|
||||
self.buffer.push(b'\n');
|
||||
|
||||
@@ -75,6 +75,7 @@ make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
CancelingUpgrade,
|
||||
CleaningCompactionLeftover,
|
||||
UpdatingTasks,
|
||||
}
|
||||
}
|
||||
@@ -138,6 +139,17 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum IndexCompaction {
|
||||
RetrieveTheIndex,
|
||||
CreateTemporaryFile,
|
||||
CopyAndCompactTheIndex,
|
||||
PersistTheCompactedIndex,
|
||||
CloseTheIndex,
|
||||
ReopenTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum InnerSwappingTwoIndexes {
|
||||
RetrieveTheTasks,
|
||||
|
||||
@@ -310,7 +310,8 @@ impl Queue {
|
||||
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
let to_delete =
|
||||
RoaringBitmap::from_sorted_iter(finished.into_iter().take(100_000)).unwrap();
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
@@ -326,7 +327,7 @@ impl Queue {
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let newest_task_id = to_delete.iter().next_back().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
|
||||
@@ -68,13 +68,14 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
KindWithContent::IndexCompaction { .. }
|
||||
| KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -288,7 +289,9 @@ impl BatchKind {
|
||||
|
||||
match (self, autobatch_kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break((this, BatchStopReason::TaskCannotBeBatched { kind, id })),
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => {
|
||||
Break((this, BatchStopReason::TaskCannotBeBatched { kind, id }))
|
||||
},
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break((this, BatchStopReason::IndexCreationMismatch { id }))
|
||||
|
||||
@@ -55,6 +55,10 @@ pub(crate) enum Batch {
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -66,6 +70,7 @@ pub(crate) enum DocumentOperation {
|
||||
|
||||
/// A [batch](Batch) that combines multiple tasks operating on an index.
|
||||
#[derive(Debug)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub(crate) enum IndexOperation {
|
||||
DocumentOperation {
|
||||
index_uid: String,
|
||||
@@ -109,7 +114,8 @@ impl Batch {
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::Export { task }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
| Batch::IndexUpdate { task, .. }
|
||||
| Batch::IndexCompaction { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
@@ -154,7 +160,8 @@ impl Batch {
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
| IndexDeletion { index_uid, .. }
|
||||
| IndexCompaction { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -174,6 +181,7 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
@@ -511,17 +519,33 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the export.
|
||||
// 3. we get the next task to compact
|
||||
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
|
||||
if let Some(task_id) = to_compact.min() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexCompaction,
|
||||
id: task_id,
|
||||
});
|
||||
let index_uid =
|
||||
task.index_uid().expect("Compaction task must have an index uid").to_owned();
|
||||
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
current_batch.processing([&mut task]);
|
||||
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||
current_batch
|
||||
.reason(BatchStopReason::TaskCannotBeBatched { kind: Kind::Export, id: task_id });
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the snapshot.
|
||||
// 5. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -531,7 +555,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 5. we batch the dumps.
|
||||
// 6. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -544,7 +568,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
@@ -25,6 +25,7 @@ use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::S3SnapshotOptions;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use process_batch::ProcessBatchInfo;
|
||||
use rayon::current_num_threads;
|
||||
@@ -87,11 +88,14 @@ pub struct Scheduler {
|
||||
|
||||
/// Snapshot compaction status.
|
||||
pub(crate) experimental_no_snapshot_compaction: bool,
|
||||
|
||||
/// S3 Snapshot options.
|
||||
pub(crate) s3_snapshot_options: Option<S3SnapshotOptions>,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub(crate) fn private_clone(&self) -> Scheduler {
|
||||
Scheduler {
|
||||
pub(crate) fn private_clone(&self) -> Self {
|
||||
Self {
|
||||
must_stop_processing: self.must_stop_processing.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
@@ -103,23 +107,52 @@ impl Scheduler {
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
embedding_cache_cap: self.embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction,
|
||||
s3_snapshot_options: self.s3_snapshot_options.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
|
||||
let IndexSchedulerOptions {
|
||||
version_file_path,
|
||||
auth_path: _,
|
||||
tasks_path: _,
|
||||
update_file_path: _,
|
||||
indexes_path: _,
|
||||
snapshots_path,
|
||||
dumps_path,
|
||||
cli_webhook_url: _,
|
||||
cli_webhook_authorization: _,
|
||||
task_db_size: _,
|
||||
index_base_map_size: _,
|
||||
enable_mdb_writemap: _,
|
||||
index_growth_amount: _,
|
||||
index_count: _,
|
||||
indexer_config,
|
||||
autobatching_enabled,
|
||||
cleanup_enabled: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit,
|
||||
instance_features: _,
|
||||
auto_upgrade: _,
|
||||
embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction,
|
||||
} = options;
|
||||
|
||||
Scheduler {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: options.batched_tasks_size_limit,
|
||||
dumps_path: options.dumps_path.clone(),
|
||||
snapshots_path: options.snapshots_path.clone(),
|
||||
autobatching_enabled: *autobatching_enabled,
|
||||
max_number_of_batched_tasks: *max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: *batched_tasks_size_limit,
|
||||
dumps_path: dumps_path.clone(),
|
||||
snapshots_path: snapshots_path.clone(),
|
||||
auth_env,
|
||||
version_file_path: options.version_file_path.clone(),
|
||||
embedding_cache_cap: options.embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction: options.experimental_no_snapshot_compaction,
|
||||
version_file_path: version_file_path.clone(),
|
||||
embedding_cache_cap: *embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction: *experimental_no_snapshot_compaction,
|
||||
s3_snapshot_options: indexer_config.s3_snapshot_options.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,22 +1,27 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::fs::{remove_file, File};
|
||||
use std::io::{ErrorKind, Seek, SeekFrom};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::{PersistError, TempPath};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
IndexCompaction, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress,
|
||||
TaskDeletionProgress, UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{
|
||||
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
|
||||
@@ -24,6 +29,9 @@ use crate::utils::{
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
/// The name of the copy of the data.mdb file used during compaction.
|
||||
const DATA_MDB_COPY_NAME: &str = "data.mdb.cpy";
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
@@ -418,6 +426,47 @@ impl IndexScheduler {
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexCompaction { index_uid: _, mut task } => {
|
||||
let KindWithContent::IndexCompaction { index_uid } = &task.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| {
|
||||
self.apply_compaction(&rtxn, &progress, index_uid)
|
||||
}));
|
||||
|
||||
let (pre_size, post_size) = match ret {
|
||||
Ok(Ok(stats)) => stats,
|
||||
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
if let Some(Details::IndexCompaction {
|
||||
index_uid: _,
|
||||
pre_compaction_size,
|
||||
post_compaction_size,
|
||||
}) = task.details.as_mut()
|
||||
{
|
||||
*pre_compaction_size = Some(Byte::from_u64(pre_size));
|
||||
*post_compaction_size = Some(Byte::from_u64(post_size));
|
||||
}
|
||||
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::Export { mut task } => {
|
||||
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
|
||||
else {
|
||||
@@ -493,6 +542,92 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_compaction(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
progress: &Progress,
|
||||
index_uid: &str,
|
||||
) -> Result<(u64, u64)> {
|
||||
// 1. Verify that the index exists
|
||||
if !self.index_mapper.index_exists(rtxn, index_uid)? {
|
||||
return Err(Error::IndexNotFound(index_uid.to_owned()));
|
||||
}
|
||||
|
||||
// 2. We retrieve the index and create a temporary file in the index directory
|
||||
progress.update_progress(IndexCompaction::RetrieveTheIndex);
|
||||
let index = self.index_mapper.index(rtxn, index_uid)?;
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
self.index_mapper
|
||||
.set_currently_updating_index(Some((index_uid.to_string(), index.clone())));
|
||||
|
||||
progress.update_progress(IndexCompaction::CreateTemporaryFile);
|
||||
let src_path = index.path().join("data.mdb");
|
||||
let pre_size = std::fs::metadata(&src_path)?.len();
|
||||
|
||||
let dst_path = TempPath::from_path(index.path().join(DATA_MDB_COPY_NAME));
|
||||
let file = File::create(&dst_path)?;
|
||||
let mut file = tempfile::NamedTempFile::from_parts(file, dst_path);
|
||||
|
||||
// 3. We copy the index data to the temporary file
|
||||
progress.update_progress(IndexCompaction::CopyAndCompactTheIndex);
|
||||
index
|
||||
.copy_to_file(file.as_file_mut(), CompactionOption::Enabled)
|
||||
.map_err(|error| Error::Milli { error, index_uid: Some(index_uid.to_string()) })?;
|
||||
// ...and reset the file position as specified in the documentation
|
||||
file.seek(SeekFrom::Start(0))?;
|
||||
|
||||
// 4. We replace the index data file with the temporary file
|
||||
progress.update_progress(IndexCompaction::PersistTheCompactedIndex);
|
||||
match file.persist(src_path) {
|
||||
Ok(file) => file.sync_all()?,
|
||||
// TODO see if we have a _resource busy_ error and probably handle this by:
|
||||
// 1. closing the index, 2. replacing and 3. reopening it
|
||||
Err(PersistError { error, file: _ }) => return Err(Error::IoError(error)),
|
||||
};
|
||||
|
||||
// 5. Prepare to close the index
|
||||
progress.update_progress(IndexCompaction::CloseTheIndex);
|
||||
|
||||
// unmark that the index is the processing one so we don't keep a handle to it, preventing its closing
|
||||
self.index_mapper.set_currently_updating_index(None);
|
||||
|
||||
self.index_mapper.close_index(rtxn, index_uid)?;
|
||||
drop(index);
|
||||
|
||||
progress.update_progress(IndexCompaction::ReopenTheIndex);
|
||||
// 6. Reopen the index
|
||||
// The index will use the compacted data file when being reopened
|
||||
let index = self.index_mapper.index(rtxn, index_uid)?;
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<_> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(stats.database_size)
|
||||
}();
|
||||
|
||||
let post_size = match res {
|
||||
Ok(post_size) => post_size,
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
);
|
||||
0
|
||||
}
|
||||
};
|
||||
|
||||
Ok((pre_size, post_size))
|
||||
}
|
||||
|
||||
/// Swap the index `lhs` with the index `rhs`.
|
||||
fn apply_index_swap(
|
||||
&self,
|
||||
@@ -780,9 +915,10 @@ impl IndexScheduler {
|
||||
|
||||
let enqueued_tasks = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
|
||||
// 0. Check if any upgrade task was matched.
|
||||
// 0. Check if any upgrade or compaction tasks were matched.
|
||||
// If so, we cancel all the failed or enqueued upgrade tasks.
|
||||
let upgrade_tasks = &self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)?;
|
||||
let compaction_tasks = &self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)?;
|
||||
let is_canceling_upgrade = !matched_tasks.is_disjoint(upgrade_tasks);
|
||||
if is_canceling_upgrade {
|
||||
let failed_tasks = self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
@@ -847,7 +983,33 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
// 3. We now have a list of tasks to cancel, cancel them
|
||||
// 3. If we are cancelling a compaction task, remove the tempfiles after incomplete compactions
|
||||
for compaction_task in &tasks_to_cancel & compaction_tasks {
|
||||
progress.update_progress(TaskCancelationProgress::CleaningCompactionLeftover);
|
||||
let task = self.queue.tasks.get_task(rtxn, compaction_task)?.unwrap();
|
||||
let Some(Details::IndexCompaction {
|
||||
index_uid,
|
||||
pre_compaction_size: _,
|
||||
post_compaction_size: _,
|
||||
}) = task.details
|
||||
else {
|
||||
unreachable!("wrong details for compaction task {compaction_task}")
|
||||
};
|
||||
|
||||
let index_path = match self.index_mapper.index_mapping.get(rtxn, &index_uid)? {
|
||||
Some(index_uuid) => self.index_mapper.index_path(index_uuid),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if let Err(e) = remove_file(index_path.join(DATA_MDB_COPY_NAME)) {
|
||||
match e.kind() {
|
||||
ErrorKind::NotFound => (),
|
||||
_ => return Err(Error::IoError(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. We now have a list of tasks to cancel, cancel them
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
|
||||
@@ -370,7 +370,7 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||
}
|
||||
Err(e) => e.into(),
|
||||
},
|
||||
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
|
||||
ureq::Error::Transport(transport) => io::Error::other(transport).into(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
|
||||
use crate::queue::TaskQueue;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
const UPDATE_FILES_DIR_NAME: &str = "update_files";
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// See [`EnvOpenOptions::open`].
|
||||
@@ -78,10 +80,32 @@ impl IndexScheduler {
|
||||
pub(super) fn process_snapshot(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
||||
|
||||
match self.scheduler.s3_snapshot_options.clone() {
|
||||
Some(options) => {
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = options;
|
||||
panic!("Non-unix platform does not support S3 snapshotting");
|
||||
}
|
||||
#[cfg(unix)]
|
||||
self.runtime
|
||||
.as_ref()
|
||||
.expect("Runtime not initialized")
|
||||
.block_on(self.process_snapshot_to_s3(progress, options, tasks))
|
||||
}
|
||||
None => self.process_snapshots_to_disk(progress, tasks),
|
||||
}
|
||||
}
|
||||
|
||||
fn process_snapshots_to_disk(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>, Error> {
|
||||
fs::create_dir_all(&self.scheduler.snapshots_path)?;
|
||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||
|
||||
@@ -128,7 +152,7 @@ impl IndexScheduler {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2.4 Create the update files directory
|
||||
let update_files_dir = temp_snapshot_dir.path().join("update_files");
|
||||
let update_files_dir = temp_snapshot_dir.path().join(UPDATE_FILES_DIR_NAME);
|
||||
fs::create_dir_all(&update_files_dir)?;
|
||||
|
||||
// 2.5 Only copy the update files of the enqueued tasks
|
||||
@@ -140,7 +164,7 @@ impl IndexScheduler {
|
||||
let task =
|
||||
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
let src = self.queue.file_store.get_update_path(content_uuid);
|
||||
let src = self.queue.file_store.update_path(content_uuid);
|
||||
let dst = update_files_dir.join(content_uuid.to_string());
|
||||
fs::copy(src, dst)?;
|
||||
}
|
||||
@@ -206,4 +230,403 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub(super) async fn process_snapshot_to_s3(
|
||||
&self,
|
||||
progress: Progress,
|
||||
opts: meilisearch_types::milli::update::S3SnapshotOptions,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
use meilisearch_types::milli::update::S3SnapshotOptions;
|
||||
|
||||
let S3SnapshotOptions {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_max_in_flight_parts,
|
||||
s3_compression_level: level,
|
||||
s3_signature_duration,
|
||||
s3_multipart_part_size,
|
||||
} = opts;
|
||||
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
let retry_backoff = backoff::ExponentialBackoff::default();
|
||||
let db_name = {
|
||||
let mut base_path = self.env.path().to_owned();
|
||||
base_path.pop();
|
||||
base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms").to_string()
|
||||
};
|
||||
|
||||
let (reader, writer) = std::io::pipe()?;
|
||||
let uploader_task = tokio::spawn(multipart_stream_to_s3(
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_max_in_flight_parts,
|
||||
s3_signature_duration,
|
||||
s3_multipart_part_size,
|
||||
must_stop_processing,
|
||||
retry_backoff,
|
||||
db_name,
|
||||
reader,
|
||||
));
|
||||
|
||||
let index_scheduler = IndexScheduler::private_clone(self);
|
||||
let builder_task = tokio::task::spawn_blocking(move || {
|
||||
stream_tarball_into_pipe(progress, level, writer, index_scheduler)
|
||||
});
|
||||
|
||||
let (uploader_result, builder_result) = tokio::join!(uploader_task, builder_task);
|
||||
|
||||
// Check uploader result first to early return on task abortion.
|
||||
// safety: JoinHandle can return an error if the task was aborted, cancelled, or panicked.
|
||||
uploader_result.unwrap()?;
|
||||
builder_result.unwrap()?;
|
||||
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
|
||||
/// Streams a tarball of the database content into a pipe.
|
||||
#[cfg(unix)]
|
||||
fn stream_tarball_into_pipe(
|
||||
progress: Progress,
|
||||
level: u32,
|
||||
writer: std::io::PipeWriter,
|
||||
index_scheduler: IndexScheduler,
|
||||
) -> std::result::Result<(), Error> {
|
||||
use std::io::Write as _;
|
||||
use std::path::Path;
|
||||
|
||||
let writer = flate2::write::GzEncoder::new(writer, flate2::Compression::new(level));
|
||||
let mut tarball = tar::Builder::new(writer);
|
||||
|
||||
// 1. Snapshot the version file
|
||||
tarball
|
||||
.append_path_with_name(&index_scheduler.scheduler.version_file_path, VERSION_FILE_NAME)?;
|
||||
|
||||
// 2. Snapshot the index scheduler LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||
let tasks_env_file = index_scheduler.env.try_clone_inner_file()?;
|
||||
let path = Path::new("tasks").join("data.mdb");
|
||||
append_file_to_tarball(&mut tarball, path, tasks_env_file)?;
|
||||
|
||||
// 2.3 Create a read transaction on the index-scheduler
|
||||
let rtxn = index_scheduler.env.read_txn()?;
|
||||
|
||||
// 2.4 Create the update files directory
|
||||
// And only copy the update files of the enqueued tasks
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||
let enqueued = index_scheduler.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||
progress.update_progress(update_file_progress);
|
||||
|
||||
// We create the update_files directory so that it
|
||||
// always exists even if there are no update files
|
||||
let update_files_dir = Path::new(UPDATE_FILES_DIR_NAME);
|
||||
let src_update_files_dir = {
|
||||
let mut path = index_scheduler.env.path().to_path_buf();
|
||||
path.pop();
|
||||
path.join(UPDATE_FILES_DIR_NAME)
|
||||
};
|
||||
tarball.append_dir(update_files_dir, src_update_files_dir)?;
|
||||
|
||||
for task_id in enqueued {
|
||||
let task = index_scheduler
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, task_id)?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
use std::fs::File;
|
||||
|
||||
let src = index_scheduler.queue.file_store.update_path(content_uuid);
|
||||
let mut update_file = File::open(src)?;
|
||||
let path = update_files_dir.join(content_uuid.to_string());
|
||||
tarball.append_file(path, &mut update_file)?;
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3. Snapshot every indexes
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||
let index_mapping = index_scheduler.index_mapper.index_mapping;
|
||||
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||
let indexes_dir = Path::new("indexes");
|
||||
let indexes_references: Vec<_> = index_scheduler
|
||||
.index_mapper
|
||||
.index_mapping
|
||||
.iter(&rtxn)?
|
||||
.map(|res| res.map_err(Error::from).map(|(name, uuid)| (name.to_string(), uuid)))
|
||||
.collect::<Result<_, Error>>()?;
|
||||
|
||||
// It's prettier to use a for loop instead of the IndexMapper::try_for_each_index
|
||||
// method, especially when we need to access the UUID, local path and index number.
|
||||
for (i, (name, uuid)) in indexes_references.into_iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
|
||||
&name, i as u32, nb_indexes,
|
||||
));
|
||||
let path = indexes_dir.join(uuid.to_string()).join("data.mdb");
|
||||
let index = index_scheduler.index_mapper.index(&rtxn, &name)?;
|
||||
let index_file = index.try_clone_inner_file()?;
|
||||
tracing::trace!("Appending index file for {name} in {}", path.display());
|
||||
append_file_to_tarball(&mut tarball, path, index_file)?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
|
||||
// 4. Snapshot the auth LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||
let auth_env_file = index_scheduler.scheduler.auth_env.try_clone_inner_file()?;
|
||||
let path = Path::new("auth").join("data.mdb");
|
||||
append_file_to_tarball(&mut tarball, path, auth_env_file)?;
|
||||
|
||||
let mut gzencoder = tarball.into_inner()?;
|
||||
gzencoder.flush()?;
|
||||
gzencoder.try_finish()?;
|
||||
let mut writer = gzencoder.finish()?;
|
||||
writer.flush()?;
|
||||
|
||||
Result::<_, Error>::Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn append_file_to_tarball<W, P>(
|
||||
tarball: &mut tar::Builder<W>,
|
||||
path: P,
|
||||
mut auth_env_file: fs::File,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
W: std::io::Write,
|
||||
P: AsRef<std::path::Path>,
|
||||
{
|
||||
use std::io::{Seek as _, SeekFrom};
|
||||
|
||||
// Note: A previous snapshot operation may have left the cursor
|
||||
// at the end of the file so we need to seek to the start.
|
||||
auth_env_file.seek(SeekFrom::Start(0))?;
|
||||
tarball.append_file(path, &mut auth_env_file)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Streams the content read from the given reader to S3.
|
||||
#[cfg(unix)]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn multipart_stream_to_s3(
|
||||
s3_bucket_url: String,
|
||||
s3_bucket_region: String,
|
||||
s3_bucket_name: String,
|
||||
s3_snapshot_prefix: String,
|
||||
s3_access_key: String,
|
||||
s3_secret_key: String,
|
||||
s3_max_in_flight_parts: std::num::NonZero<usize>,
|
||||
s3_signature_duration: std::time::Duration,
|
||||
s3_multipart_part_size: u64,
|
||||
must_stop_processing: super::MustStopProcessing,
|
||||
retry_backoff: backoff::exponential::ExponentialBackoff<backoff::SystemClock>,
|
||||
db_name: String,
|
||||
reader: std::io::PipeReader,
|
||||
) -> Result<(), Error> {
|
||||
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use reqwest::{Client, Response};
|
||||
use rusty_s3::S3Action as _;
|
||||
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
let reader = OwnedFd::from(reader);
|
||||
let reader = tokio::net::unix::pipe::Receiver::from_owned_fd(reader)?;
|
||||
let s3_snapshot_prefix = PathBuf::from(s3_snapshot_prefix);
|
||||
let url =
|
||||
s3_bucket_url.parse().map_err(BucketError::ParseError).map_err(Error::S3BucketError)?;
|
||||
let bucket = Bucket::new(url, UrlStyle::Path, s3_bucket_name, s3_bucket_region)
|
||||
.map_err(Error::S3BucketError)?;
|
||||
let credential = Credentials::new(s3_access_key, s3_secret_key);
|
||||
|
||||
// Note for the future (rust 1.91+): use with_added_extension, it's prettier
|
||||
let object_path = s3_snapshot_prefix.join(format!("{db_name}.snapshot"));
|
||||
// Note: It doesn't work on Windows and if a port to this platform is needed,
|
||||
// use the slash-path crate or similar to get the correct path separator.
|
||||
let object = object_path.display().to_string();
|
||||
|
||||
let action = bucket.create_multipart_upload(Some(&credential), &object);
|
||||
let url = action.sign(s3_signature_duration);
|
||||
|
||||
let client = Client::new();
|
||||
let resp = client.post(url).send().await.map_err(Error::S3HttpError)?;
|
||||
let status = resp.status();
|
||||
|
||||
let body = match resp.error_for_status_ref() {
|
||||
Ok(_) => resp.text().await.map_err(Error::S3HttpError)?,
|
||||
Err(_) => {
|
||||
return Err(Error::S3Error { status, body: resp.text().await.unwrap_or_default() })
|
||||
}
|
||||
};
|
||||
|
||||
let multipart =
|
||||
CreateMultipartUpload::parse_response(&body).map_err(|e| Error::S3XmlError(Box::new(e)))?;
|
||||
tracing::debug!("Starting the upload of the snapshot to {object}");
|
||||
|
||||
// We use this bumpalo for etags strings.
|
||||
let bump = bumpalo::Bump::new();
|
||||
let mut etags = Vec::<&str>::new();
|
||||
let mut in_flight = VecDeque::<(JoinHandle<reqwest::Result<Response>>, Bytes)>::with_capacity(
|
||||
s3_max_in_flight_parts.get(),
|
||||
);
|
||||
|
||||
// Part numbers start at 1 and cannot be larger than 10k
|
||||
for part_number in 1u16.. {
|
||||
if must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let part_upload =
|
||||
bucket.upload_part(Some(&credential), &object, part_number, multipart.upload_id());
|
||||
let url = part_upload.sign(s3_signature_duration);
|
||||
|
||||
// Wait for a buffer to be ready if there are in-flight parts that landed
|
||||
let mut buffer = if in_flight.len() >= s3_max_in_flight_parts.get() {
|
||||
let (handle, buffer) = in_flight.pop_front().expect("At least one in flight request");
|
||||
let resp = join_and_map_error(handle).await?;
|
||||
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
|
||||
|
||||
let mut buffer = match buffer.try_into_mut() {
|
||||
Ok(buffer) => buffer,
|
||||
Err(_) => unreachable!("All bytes references were consumed in the task"),
|
||||
};
|
||||
buffer.clear();
|
||||
buffer
|
||||
} else {
|
||||
BytesMut::with_capacity(s3_multipart_part_size as usize)
|
||||
};
|
||||
|
||||
// If we successfully read enough bytes,
|
||||
// we can continue and send the buffer/part
|
||||
while buffer.len() < (s3_multipart_part_size as usize / 2) {
|
||||
// Wait for the pipe to be readable
|
||||
|
||||
use std::io;
|
||||
reader.readable().await?;
|
||||
|
||||
match reader.try_read_buf(&mut buffer) {
|
||||
Ok(0) => break,
|
||||
// We read some bytes but maybe not enough
|
||||
Ok(_) => continue,
|
||||
// The readiness event is a false positive.
|
||||
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
if buffer.is_empty() {
|
||||
// Break the loop if the buffer is
|
||||
// empty after we tried to read bytes
|
||||
break;
|
||||
}
|
||||
|
||||
let body = buffer.freeze();
|
||||
tracing::trace!("Sending part {part_number}");
|
||||
let task = tokio::spawn({
|
||||
let client = client.clone();
|
||||
let body = body.clone();
|
||||
backoff::future::retry(retry_backoff.clone(), move || {
|
||||
let client = client.clone();
|
||||
let url = url.clone();
|
||||
let body = body.clone();
|
||||
async move {
|
||||
match client.put(url).body(body).send().await {
|
||||
Ok(resp) if resp.status().is_client_error() => {
|
||||
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||
}
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err(backoff::Error::transient(e)),
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
in_flight.push_back((task, body));
|
||||
}
|
||||
|
||||
for (handle, _buffer) in in_flight {
|
||||
let resp = join_and_map_error(handle).await?;
|
||||
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
|
||||
}
|
||||
|
||||
tracing::debug!("Finalizing the multipart upload");
|
||||
|
||||
let action = bucket.complete_multipart_upload(
|
||||
Some(&credential),
|
||||
&object,
|
||||
multipart.upload_id(),
|
||||
etags.iter().map(AsRef::as_ref),
|
||||
);
|
||||
let url = action.sign(s3_signature_duration);
|
||||
let body = action.body();
|
||||
let resp = backoff::future::retry(retry_backoff, move || {
|
||||
let client = client.clone();
|
||||
let url = url.clone();
|
||||
let body = body.clone();
|
||||
async move {
|
||||
match client.post(url).body(body).send().await {
|
||||
Ok(resp) if resp.status().is_client_error() => {
|
||||
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||
}
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err(backoff::Error::transient(e)),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(Error::S3HttpError)?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
|
||||
if status.is_success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::S3Error { status, body })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
async fn join_and_map_error(
|
||||
join_handle: tokio::task::JoinHandle<Result<reqwest::Response, reqwest::Error>>,
|
||||
) -> Result<reqwest::Response> {
|
||||
// safety: Panic happens if the task (JoinHandle) was aborted, cancelled, or panicked
|
||||
let request = join_handle.await.unwrap();
|
||||
let resp = request.map_err(Error::S3HttpError)?;
|
||||
match resp.error_for_status_ref() {
|
||||
Ok(_) => Ok(resp),
|
||||
Err(_) => Err(Error::S3Error {
|
||||
status: resp.status(),
|
||||
body: resp.text().await.unwrap_or_default(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn extract_and_append_etag<'b>(
|
||||
bump: &'b bumpalo::Bump,
|
||||
etags: &mut Vec<&'b str>,
|
||||
headers: &reqwest::header::HeaderMap,
|
||||
) -> Result<()> {
|
||||
use reqwest::header::ETAG;
|
||||
|
||||
let etag = headers.get(ETAG).ok_or_else(|| Error::S3XmlError("Missing ETag header".into()))?;
|
||||
let etag = etag.to_str().map_err(|e| Error::S3XmlError(Box::new(e)))?;
|
||||
etags.push(bump.alloc_str(etag));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 25, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.25.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -722,7 +722,7 @@ fn basic_get_stats() {
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -742,6 +742,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -753,10 +754,10 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -776,6 +777,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -787,7 +789,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
@@ -797,7 +799,7 @@ fn basic_get_stats() {
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -817,6 +819,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -828,7 +831,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
@@ -839,7 +842,7 @@ fn basic_get_stats() {
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -859,6 +862,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -870,7 +874,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -126,7 +126,7 @@ impl IndexScheduler {
|
||||
std::fs::create_dir_all(&options.auth_path).unwrap();
|
||||
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
|
||||
let index_scheduler =
|
||||
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
|
||||
Self::new_test(options, auth_env, version, None, sender, planned_failures).unwrap();
|
||||
|
||||
// To be 100% consistent between all test we're going to start the scheduler right now
|
||||
// and ensure it's in the expected starting state.
|
||||
|
||||
@@ -45,6 +45,9 @@ pub fn upgrade_index_scheduler(
|
||||
(1, 19, _) => 0,
|
||||
(1, 20, _) => 0,
|
||||
(1, 21, _) => 0,
|
||||
(1, 22, _) => 0,
|
||||
(1, 23, _) => 0,
|
||||
(1, 24, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
|
||||
@@ -256,14 +256,15 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
use KindWithContent as K;
|
||||
let mut index_uids = vec![];
|
||||
match &mut task.kind {
|
||||
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
||||
K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
|
||||
K::IndexCreation { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| K::DocumentEdition { index_uid, .. }
|
||||
| K::DocumentDeletion { index_uid, .. }
|
||||
| K::DocumentDeletionByFilter { index_uid, .. }
|
||||
| K::DocumentClear { index_uid }
|
||||
| K::SettingsUpdate { index_uid, .. }
|
||||
| K::IndexDeletion { index_uid }
|
||||
| K::IndexCreation { index_uid, .. }
|
||||
| K::IndexCompaction { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::IndexUpdate { index_uid, new_index_uid, .. } => {
|
||||
index_uids.push(index_uid);
|
||||
if let Some(new_uid) = new_index_uid {
|
||||
@@ -618,6 +619,13 @@ impl crate::IndexScheduler {
|
||||
Details::UpgradeDatabase { from: _, to: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
||||
}
|
||||
Details::IndexCompaction {
|
||||
index_uid: _,
|
||||
pre_compaction_size: _,
|
||||
post_compaction_size: _,
|
||||
} => {
|
||||
assert_eq!(kind.as_kind(), Kind::IndexCompaction);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -271,9 +271,10 @@ macro_rules! json_string {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate as meili_snap;
|
||||
use crate::UUID_IN_MESSAGE_RE;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[test]
|
||||
fn snap() {
|
||||
|
||||
@@ -109,6 +109,7 @@ impl HeedAuthStore {
|
||||
Action::IndexesGet,
|
||||
Action::IndexesUpdate,
|
||||
Action::IndexesSwap,
|
||||
Action::IndexesCompact,
|
||||
]
|
||||
.iter(),
|
||||
);
|
||||
@@ -315,7 +316,9 @@ impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(
|
||||
(key_id, action, index): &'_ Self::EItem,
|
||||
) -> StdResult<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
bytes.extend_from_slice(key_id.as_bytes());
|
||||
|
||||
@@ -5,6 +5,7 @@ use actix_web::{self as aweb, HttpResponseBuilder};
|
||||
use aweb::http::header;
|
||||
use aweb::rt::task::JoinError;
|
||||
use convert_case::Casing;
|
||||
use milli::cellulite;
|
||||
use milli::heed::{Error as HeedError, MdbError};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
@@ -239,6 +240,7 @@ InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQU
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeojsonField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -256,6 +258,7 @@ InvalidIndexUid , InvalidRequest , BAD_REQU
|
||||
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchQueryPersonalization , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -313,6 +316,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -388,6 +393,9 @@ TooManyVectors , InvalidRequest , BAD_REQU
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
InvalidS3SnapshotRequest , Internal , BAD_REQUEST ;
|
||||
InvalidS3SnapshotParameters , Internal , BAD_REQUEST ;
|
||||
S3SnapshotServerError , Internal , BAD_GATEWAY ;
|
||||
|
||||
// Experimental features
|
||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -501,7 +509,9 @@ impl ErrorCode for milli::Error {
|
||||
Code::InvalidFacetSearchFacetName
|
||||
}
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidGeoField { .. } | UserError::GeoJsonError(_) => {
|
||||
Code::InvalidDocumentGeoField
|
||||
}
|
||||
UserError::InvalidVectorDimensions { .. }
|
||||
| UserError::InvalidIndexingVectorDimensions { .. } => {
|
||||
Code::InvalidVectorDimensions
|
||||
@@ -525,6 +535,17 @@ impl ErrorCode for milli::Error {
|
||||
| UserError::DocumentEditionCompilationError(_) => {
|
||||
Code::EditDocumentsByFunctionError
|
||||
}
|
||||
UserError::CelluliteError(err) => match err {
|
||||
cellulite::Error::BuildCanceled
|
||||
| cellulite::Error::VersionMismatchOnBuild(_)
|
||||
| cellulite::Error::DatabaseDoesntExists
|
||||
| cellulite::Error::Heed(_)
|
||||
| cellulite::Error::InvalidGeometry(_)
|
||||
| cellulite::Error::InternalDocIdMissing(_, _)
|
||||
| cellulite::Error::CannotConvertLineToCell(_, _, _) => Code::Internal,
|
||||
cellulite::Error::InvalidGeoJson(_) => Code::InvalidDocumentGeojsonField,
|
||||
},
|
||||
UserError::MalformedGeojson(_) => Code::InvalidDocumentGeojsonField,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -664,6 +685,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `personalize` is invalid, expected a JSON object with `userContext` string.")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `userContext` is invalid, expected a string.")
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
||||
@@ -380,6 +380,9 @@ pub enum Action {
|
||||
#[serde(rename = "webhooks.*")]
|
||||
#[deserr(rename = "webhooks.*")]
|
||||
WebhooksAll,
|
||||
#[serde(rename = "indexes.compact")]
|
||||
#[deserr(rename = "indexes.compact")]
|
||||
IndexesCompact,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
@@ -398,6 +401,7 @@ impl Action {
|
||||
INDEXES_UPDATE => Some(Self::IndexesUpdate),
|
||||
INDEXES_DELETE => Some(Self::IndexesDelete),
|
||||
INDEXES_SWAP => Some(Self::IndexesSwap),
|
||||
INDEXES_COMPACT => Some(Self::IndexesCompact),
|
||||
TASKS_ALL => Some(Self::TasksAll),
|
||||
TASKS_CANCEL => Some(Self::TasksCancel),
|
||||
TASKS_DELETE => Some(Self::TasksDelete),
|
||||
@@ -462,6 +466,7 @@ impl Action {
|
||||
IndexesUpdate => false,
|
||||
IndexesDelete => false,
|
||||
IndexesSwap => false,
|
||||
IndexesCompact => false,
|
||||
TasksCancel => false,
|
||||
TasksDelete => false,
|
||||
TasksGet => true,
|
||||
@@ -513,6 +518,7 @@ pub mod actions {
|
||||
pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr();
|
||||
pub const INDEXES_DELETE: u8 = IndexesDelete.repr();
|
||||
pub const INDEXES_SWAP: u8 = IndexesSwap.repr();
|
||||
pub const INDEXES_COMPACT: u8 = IndexesCompact.repr();
|
||||
pub const TASKS_ALL: u8 = TasksAll.repr();
|
||||
pub const TASKS_CANCEL: u8 = TasksCancel.repr();
|
||||
pub const TASKS_DELETE: u8 = TasksDelete.repr();
|
||||
@@ -614,6 +620,7 @@ pub(crate) mod test {
|
||||
assert!(WebhooksDelete.repr() == 47 && WEBHOOKS_DELETE == 47);
|
||||
assert!(WebhooksCreate.repr() == 48 && WEBHOOKS_CREATE == 48);
|
||||
assert!(WebhooksAll.repr() == 49 && WEBHOOKS_ALL == 49);
|
||||
assert!(IndexesCompact.repr() == 50 && INDEXES_COMPACT == 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![allow(clippy::result_large_err)]
|
||||
|
||||
pub mod batch_view;
|
||||
pub mod batches;
|
||||
pub mod compression;
|
||||
|
||||
@@ -346,24 +346,26 @@ impl<T> Settings<T> {
|
||||
continue;
|
||||
};
|
||||
|
||||
Self::hide_secret(api_key);
|
||||
hide_secret(api_key, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn hide_secret(secret: &mut String) {
|
||||
match secret.len() {
|
||||
x if x < 10 => {
|
||||
secret.replace_range(.., "XXX...");
|
||||
/// Redact a secret string, starting from the `secret_offset`th byte.
|
||||
pub fn hide_secret(secret: &mut String, secret_offset: usize) {
|
||||
match secret.len().checked_sub(secret_offset) {
|
||||
None => (),
|
||||
Some(x) if x < 10 => {
|
||||
secret.replace_range(secret_offset.., "XXX...");
|
||||
}
|
||||
x if x < 20 => {
|
||||
secret.replace_range(2.., "XXXX...");
|
||||
Some(x) if x < 20 => {
|
||||
secret.replace_range((secret_offset + 2).., "XXXX...");
|
||||
}
|
||||
x if x < 30 => {
|
||||
secret.replace_range(3.., "XXXXX...");
|
||||
}
|
||||
_x => {
|
||||
secret.replace_range(5.., "XXXXXX...");
|
||||
Some(x) if x < 30 => {
|
||||
secret.replace_range((secret_offset + 3).., "XXXXX...");
|
||||
}
|
||||
Some(_x) => {
|
||||
secret.replace_range((secret_offset + 5).., "XXXXXX...");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,6 +142,11 @@ pub struct DetailsView {
|
||||
pub old_index_uid: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub new_index_uid: Option<String>,
|
||||
// index compaction
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub pre_compaction_size: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub post_compaction_size: Option<String>,
|
||||
}
|
||||
|
||||
impl DetailsView {
|
||||
@@ -314,6 +319,24 @@ impl DetailsView {
|
||||
// We should never be able to batch multiple renames at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
pre_compaction_size: match (
|
||||
self.pre_compaction_size.clone(),
|
||||
other.pre_compaction_size.clone(),
|
||||
) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple compactions at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
post_compaction_size: match (
|
||||
self.post_compaction_size.clone(),
|
||||
other.post_compaction_size.clone(),
|
||||
) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple compactions at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -415,6 +438,15 @@ impl From<Details> for DetailsView {
|
||||
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
|
||||
..Default::default()
|
||||
},
|
||||
Details::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
|
||||
DetailsView {
|
||||
pre_compaction_size: pre_compaction_size
|
||||
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
post_compaction_size: post_compaction_size
|
||||
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,7 +67,8 @@ impl Task {
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid } => Some(index_uid),
|
||||
| IndexDeletion { index_uid }
|
||||
| IndexCompaction { index_uid } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,7 +95,8 @@ impl Task {
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::SnapshotCreation
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. } => None,
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::IndexCompaction { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -170,6 +172,9 @@ pub enum KindWithContent {
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
@@ -206,6 +211,7 @@ impl KindWithContent {
|
||||
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
|
||||
KindWithContent::Export { .. } => Kind::Export,
|
||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||
KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,7 +232,8 @@ impl KindWithContent {
|
||||
| DocumentClear { index_uid }
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
| IndexDeletion { index_uid } => vec![index_uid],
|
||||
| IndexDeletion { index_uid }
|
||||
| IndexCompaction { index_uid } => vec![index_uid],
|
||||
IndexUpdate { index_uid, new_index_uid, .. } => {
|
||||
let mut indexes = vec![index_uid.as_str()];
|
||||
if let Some(new_uid) = new_index_uid {
|
||||
@@ -325,6 +332,11 @@ impl KindWithContent {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -407,6 +419,11 @@ impl KindWithContent {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -469,6 +486,11 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -579,6 +601,7 @@ pub enum Kind {
|
||||
SnapshotCreation,
|
||||
Export,
|
||||
UpgradeDatabase,
|
||||
IndexCompaction,
|
||||
}
|
||||
|
||||
impl Kind {
|
||||
@@ -590,7 +613,8 @@ impl Kind {
|
||||
| Kind::SettingsUpdate
|
||||
| Kind::IndexCreation
|
||||
| Kind::IndexDeletion
|
||||
| Kind::IndexUpdate => true,
|
||||
| Kind::IndexUpdate
|
||||
| Kind::IndexCompaction => true,
|
||||
Kind::IndexSwap
|
||||
| Kind::TaskCancelation
|
||||
| Kind::TaskDeletion
|
||||
@@ -618,6 +642,7 @@ impl Display for Kind {
|
||||
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
|
||||
Kind::Export => write!(f, "export"),
|
||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||
Kind::IndexCompaction => write!(f, "indexCompaction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -653,6 +678,8 @@ impl FromStr for Kind {
|
||||
Ok(Kind::Export)
|
||||
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
|
||||
Ok(Kind::UpgradeDatabase)
|
||||
} else if kind.eq_ignore_ascii_case("indexCompaction") {
|
||||
Ok(Kind::IndexCompaction)
|
||||
} else {
|
||||
Err(ParseTaskKindError(kind.to_owned()))
|
||||
}
|
||||
@@ -738,6 +765,11 @@ pub enum Details {
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
pre_compaction_size: Option<Byte>,
|
||||
post_compaction_size: Option<Byte>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
@@ -800,6 +832,10 @@ impl Details {
|
||||
Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
|
||||
Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
|
||||
Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),
|
||||
Self::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
|
||||
*pre_compaction_size = None;
|
||||
*post_compaction_size = None;
|
||||
}
|
||||
Self::SettingsUpdate { .. }
|
||||
| Self::IndexInfo { .. }
|
||||
| Self::Dump { .. }
|
||||
|
||||
@@ -11,6 +11,24 @@ pub struct Webhook {
|
||||
pub headers: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
impl Webhook {
|
||||
pub fn redact_authorization_header(&mut self) {
|
||||
// headers are case insensitive, so to make the redaction robust we iterate over qualifying headers
|
||||
// rather than getting one canonical `Authorization` header.
|
||||
for value in self
|
||||
.headers
|
||||
.iter_mut()
|
||||
.filter_map(|(name, value)| name.eq_ignore_ascii_case("authorization").then_some(value))
|
||||
{
|
||||
if value.starts_with("Bearer ") {
|
||||
crate::settings::hide_secret(value, "Bearer ".len());
|
||||
} else {
|
||||
crate::settings::hide_secret(value, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WebhooksView {
|
||||
|
||||
@@ -91,7 +91,7 @@ time = { version = "0.3.41", features = [
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
toml = "0.8.23"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
|
||||
@@ -205,7 +205,10 @@ struct Infos {
|
||||
experimental_no_snapshot_compaction: bool,
|
||||
experimental_no_edition_2024_for_dumps: bool,
|
||||
experimental_no_edition_2024_for_settings: bool,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
experimental_vector_store_setting: bool,
|
||||
experimental_personalization: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@@ -215,6 +218,7 @@ struct Infos {
|
||||
import_snapshot: bool,
|
||||
schedule_snapshot: Option<u64>,
|
||||
snapshot_dir: bool,
|
||||
uses_s3_snapshots: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
http_addr: bool,
|
||||
@@ -283,6 +287,8 @@ impl Infos {
|
||||
indexer_options,
|
||||
config_file_path,
|
||||
no_analytics: _,
|
||||
experimental_personalization_api_key,
|
||||
s3_snapshot_options,
|
||||
} = options;
|
||||
|
||||
let schedule_snapshot = match schedule_snapshot {
|
||||
@@ -296,6 +302,8 @@ impl Infos {
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = indexer_options;
|
||||
|
||||
let RuntimeTogglableFeatures {
|
||||
@@ -344,6 +352,7 @@ impl Infos {
|
||||
import_snapshot: import_snapshot.is_some(),
|
||||
schedule_snapshot,
|
||||
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
||||
uses_s3_snapshots: s3_snapshot_options.is_some(),
|
||||
ignore_missing_snapshot,
|
||||
ignore_snapshot_if_db_exists,
|
||||
http_addr: http_addr != default_http_addr(),
|
||||
@@ -365,6 +374,9 @@ impl Infos {
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_personalization: experimental_personalization_api_key.is_some(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ use tokio::task::JoinError;
|
||||
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
|
||||
@@ -37,6 +38,8 @@ pub enum MeilisearchHttpError {
|
||||
PaginationInFederatedQuery(usize, &'static str),
|
||||
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
|
||||
FacetsInFederatedQuery(usize, String, Vec<String>),
|
||||
#[error("Inside `.queries[{0}]`: Using `.personalize` is not allowed in federated queries.\n - Hint: remove `personalize` from query #{0} or remove `federation` from the request")]
|
||||
PersonalizationInFederatedQuery(usize),
|
||||
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
|
||||
InconsistentFacetOrder {
|
||||
facet: String,
|
||||
@@ -136,6 +139,9 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
||||
Code::InvalidMultiSearchFacetOrder
|
||||
}
|
||||
MeilisearchHttpError::PersonalizationInFederatedQuery(_) => {
|
||||
Code::InvalidMultiSearchQueryPersonalization
|
||||
}
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
||||
Code::InconsistentDocumentChangeHeaders
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
#![allow(clippy::result_large_err)]
|
||||
#![allow(rustdoc::private_intra_doc_links)]
|
||||
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod analytics;
|
||||
@@ -9,6 +11,7 @@ pub mod middleware;
|
||||
pub mod option;
|
||||
#[cfg(test)]
|
||||
mod option_test;
|
||||
pub mod personalization;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
pub mod search_queue;
|
||||
@@ -56,6 +59,7 @@ use tracing::{error, info_span};
|
||||
use tracing_subscriber::filter::Targets;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::personalization::PersonalizationService;
|
||||
|
||||
/// Default number of simultaneously opened indexes.
|
||||
///
|
||||
@@ -126,12 +130,8 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered<
|
||||
>;
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
services: ServicesData,
|
||||
opt: Opt,
|
||||
logs: (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Data<Analytics>,
|
||||
enable_dashboard: bool,
|
||||
) -> actix_web::App<
|
||||
impl ServiceFactory<
|
||||
@@ -143,17 +143,7 @@ pub fn create_app(
|
||||
>,
|
||||
> {
|
||||
let app = actix_web::App::new()
|
||||
.configure(|s| {
|
||||
configure_data(
|
||||
s,
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
search_queue.clone(),
|
||||
&opt,
|
||||
logs,
|
||||
analytics.clone(),
|
||||
)
|
||||
})
|
||||
.configure(|s| configure_data(s, services, &opt))
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
@@ -214,7 +204,10 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
pub fn setup_meilisearch(
|
||||
opt: &Opt,
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let index_scheduler_opt = IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
@@ -228,7 +221,11 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
task_db_size: opt.max_task_db_size.as_u64() as usize,
|
||||
index_base_map_size: opt.max_index_size.as_u64() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: Arc::new((&opt.indexer_options).try_into()?),
|
||||
indexer_config: Arc::new({
|
||||
let s3_snapshot_options =
|
||||
opt.s3_snapshot_options.clone().map(|opt| opt.try_into()).transpose()?;
|
||||
IndexerConfig { s3_snapshot_options, ..(&opt.indexer_options).try_into()? }
|
||||
}),
|
||||
autobatching_enabled: true,
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
@@ -254,6 +251,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
index_scheduler_opt,
|
||||
OnFailure::RemoveDb,
|
||||
binary_version, // the db is empty
|
||||
handle,
|
||||
)?,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
@@ -271,7 +269,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
}
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let src_path_exists = path.exists();
|
||||
@@ -282,6 +280,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
index_scheduler_opt,
|
||||
OnFailure::RemoveDb,
|
||||
binary_version, // the db is empty
|
||||
handle,
|
||||
)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
@@ -302,10 +301,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
||||
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
}
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
};
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
@@ -336,6 +335,7 @@ fn open_or_create_database_unchecked(
|
||||
index_scheduler_opt: IndexSchedulerOptions,
|
||||
on_failure: OnFailure,
|
||||
version: (u32, u32, u32),
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
@@ -343,7 +343,7 @@ fn open_or_create_database_unchecked(
|
||||
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|
||||
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
|
||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version, Some(handle))?)
|
||||
};
|
||||
|
||||
match (
|
||||
@@ -450,6 +450,7 @@ fn open_or_create_database(
|
||||
index_scheduler_opt: IndexSchedulerOptions,
|
||||
empty_db: bool,
|
||||
binary_version: (u32, u32, u32),
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
let version = if !empty_db {
|
||||
check_version(opt, &index_scheduler_opt, binary_version)?
|
||||
@@ -457,7 +458,7 @@ fn open_or_create_database(
|
||||
binary_version
|
||||
};
|
||||
|
||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
|
||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version, handle)
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
@@ -525,7 +526,11 @@ fn import_dump(
|
||||
let indexer_config = if base_config.max_threads.is_none() {
|
||||
let (thread_pool, _) = default_thread_pool_and_threads();
|
||||
|
||||
let _config = IndexerConfig { thread_pool, ..*base_config };
|
||||
let _config = IndexerConfig {
|
||||
thread_pool,
|
||||
s3_snapshot_options: base_config.s3_snapshot_options.clone(),
|
||||
..*base_config
|
||||
};
|
||||
backup_config = _config;
|
||||
&backup_config
|
||||
} else {
|
||||
@@ -673,23 +678,26 @@ fn import_dump(
|
||||
Ok(index_scheduler_dump.finish()?)
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: Data<AuthController>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
opt: &Opt,
|
||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Data<Analytics>,
|
||||
) {
|
||||
pub fn configure_data(config: &mut web::ServiceConfig, services: ServicesData, opt: &Opt) {
|
||||
let ServicesData {
|
||||
index_scheduler,
|
||||
auth,
|
||||
search_queue,
|
||||
personalization_service,
|
||||
logs_route_handle,
|
||||
logs_stderr_handle,
|
||||
analytics,
|
||||
} = services;
|
||||
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||
config
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(search_queue)
|
||||
.app_data(analytics)
|
||||
.app_data(web::Data::new(logs_route))
|
||||
.app_data(web::Data::new(logs_stderr))
|
||||
.app_data(personalization_service)
|
||||
.app_data(logs_route_handle)
|
||||
.app_data(logs_stderr_handle)
|
||||
.app_data(web::Data::new(opt.clone()))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
@@ -750,3 +758,14 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
|
||||
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ServicesData {
|
||||
pub index_scheduler: Data<IndexScheduler>,
|
||||
pub auth: Data<AuthController>,
|
||||
pub search_queue: Data<SearchQueue>,
|
||||
pub personalization_service: Data<PersonalizationService>,
|
||||
pub logs_route_handle: Data<LogRouteHandle>,
|
||||
pub logs_stderr_handle: Data<LogStderrHandle>,
|
||||
pub analytics: Data<Analytics>,
|
||||
}
|
||||
|
||||
@@ -14,10 +14,11 @@ use index_scheduler::IndexScheduler;
|
||||
use is_terminal::IsTerminal;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::option::LogMode;
|
||||
use meilisearch::personalization::PersonalizationService;
|
||||
use meilisearch::search_queue::SearchQueue;
|
||||
use meilisearch::{
|
||||
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
@@ -76,7 +77,10 @@ fn on_panic(info: &std::panic::PanicHookInfo) {
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
try_main().await.inspect_err(|error| {
|
||||
// won't panic inside of tokio::main
|
||||
let runtime = tokio::runtime::Handle::current();
|
||||
|
||||
try_main(runtime).await.inspect_err(|error| {
|
||||
tracing::error!(%error);
|
||||
let mut current = error.source();
|
||||
let mut depth = 0;
|
||||
@@ -88,7 +92,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
})
|
||||
}
|
||||
|
||||
async fn try_main() -> anyhow::Result<()> {
|
||||
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
@@ -122,7 +126,7 @@ async fn try_main() -> anyhow::Result<()> {
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, runtime)?;
|
||||
|
||||
let analytics =
|
||||
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
|
||||
@@ -149,8 +153,15 @@ async fn run_http(
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
let auth_controller = Data::from(auth_controller);
|
||||
let auth = Data::from(auth_controller);
|
||||
let analytics = Data::from(analytics);
|
||||
// Create personalization service with API key from options
|
||||
let personalization_service = Data::new(
|
||||
opt.experimental_personalization_api_key
|
||||
.clone()
|
||||
.map(PersonalizationService::cohere)
|
||||
.unwrap_or_else(PersonalizationService::disabled),
|
||||
);
|
||||
let search_queue = SearchQueue::new(
|
||||
opt.experimental_search_queue_size,
|
||||
available_parallelism()
|
||||
@@ -162,18 +173,22 @@ async fn run_http(
|
||||
usize::from(opt.experimental_drop_search_after) as u64
|
||||
));
|
||||
let search_queue = Data::new(search_queue);
|
||||
let (logs_route_handle, logs_stderr_handle) = logs;
|
||||
let logs_route_handle = Data::new(logs_route_handle);
|
||||
let logs_stderr_handle = Data::new(logs_stderr_handle);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
search_queue.clone(),
|
||||
opt.clone(),
|
||||
logs.clone(),
|
||||
analytics.clone(),
|
||||
enable_dashboard,
|
||||
)
|
||||
})
|
||||
let services = ServicesData {
|
||||
index_scheduler,
|
||||
auth,
|
||||
search_queue,
|
||||
personalization_service,
|
||||
logs_route_handle,
|
||||
logs_stderr_handle,
|
||||
analytics,
|
||||
};
|
||||
|
||||
let http_server =
|
||||
HttpServer::new(move || create_app(services.clone(), opt.clone(), enable_dashboard))
|
||||
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
|
||||
.disable_signals()
|
||||
.keep_alive(KeepAlive::Os);
|
||||
|
||||
@@ -114,4 +114,9 @@ lazy_static! {
|
||||
"Meilisearch Task Queue Size Until Stop Registering",
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
|
||||
"meilisearch_personalized_search_requests",
|
||||
"Meilisearch number of search requests with personalization"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
}
|
||||
|
||||
@@ -7,12 +7,13 @@ use std::ops::Deref;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{env, fmt, fs};
|
||||
|
||||
use byte_unit::{Byte, ParseError, UnitType};
|
||||
use clap::Parser;
|
||||
use meilisearch_types::features::InstanceTogglableFeatures;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::update::{IndexerConfig, S3SnapshotOptions};
|
||||
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
|
||||
use rustls::server::{ServerSessionMemoryCache, WebPkiClientVerifier};
|
||||
use rustls::RootCertStore;
|
||||
@@ -55,6 +56,10 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO
|
||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@@ -70,6 +75,22 @@ const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
|
||||
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
|
||||
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
|
||||
|
||||
// Related to S3 snapshots
|
||||
const MEILI_S3_BUCKET_URL: &str = "MEILI_S3_BUCKET_URL";
|
||||
const MEILI_S3_BUCKET_REGION: &str = "MEILI_S3_BUCKET_REGION";
|
||||
const MEILI_S3_BUCKET_NAME: &str = "MEILI_S3_BUCKET_NAME";
|
||||
const MEILI_S3_SNAPSHOT_PREFIX: &str = "MEILI_S3_SNAPSHOT_PREFIX";
|
||||
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
|
||||
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
|
||||
const MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS: &str = "MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS";
|
||||
const MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL: &str = "MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS: &str =
|
||||
"MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS";
|
||||
const MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE: &str = "MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
@@ -79,6 +100,10 @@ const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
|
||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
|
||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC_STR: &str = "86400";
|
||||
const DEFAULT_DUMP_DIR: &str = "dumps/";
|
||||
const DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS: NonZeroUsize = NonZeroUsize::new(10).unwrap();
|
||||
const DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL: u32 = 0;
|
||||
const DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS: u64 = 8 * 3600; // 8 hours
|
||||
const DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE: Byte = Byte::from_u64(375 * 1024 * 1024); // 375 MiB
|
||||
|
||||
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
|
||||
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
|
||||
@@ -471,10 +496,20 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_no_snapshot_compaction: bool,
|
||||
|
||||
/// Experimental personalization API key feature.
|
||||
///
|
||||
/// Sets the API key for personalization features.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
|
||||
pub experimental_personalization_api_key: Option<String>,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub s3_snapshot_options: Option<S3SnapshotOpts>,
|
||||
|
||||
/// Set the path to a configuration file that should be used to setup the engine.
|
||||
/// Format must be TOML.
|
||||
#[clap(long)]
|
||||
@@ -576,6 +611,8 @@ impl Opt {
|
||||
experimental_limit_batched_tasks_total_size,
|
||||
experimental_embedding_cache_entries,
|
||||
experimental_no_snapshot_compaction,
|
||||
experimental_personalization_api_key,
|
||||
s3_snapshot_options,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -676,7 +713,22 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
|
||||
experimental_no_snapshot_compaction.to_string(),
|
||||
);
|
||||
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
|
||||
experimental_personalization_api_key,
|
||||
);
|
||||
}
|
||||
indexer_options.export_to_env();
|
||||
if let Some(s3_snapshot_options) = s3_snapshot_options {
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = s3_snapshot_options;
|
||||
panic!("S3 snapshot options are not supported on Windows");
|
||||
}
|
||||
#[cfg(unix)]
|
||||
s3_snapshot_options.export_to_env();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
|
||||
@@ -772,6 +824,22 @@ pub struct IndexerOpts {
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_dumps: bool,
|
||||
|
||||
/// Experimental no edition 2024 to compute prefixes. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/862>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 to compute prefixes.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
|
||||
/// Experimental no edition 2024 to compute facets. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/862>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 to compute facets.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
@@ -783,6 +851,8 @@ impl IndexerOpts {
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
@@ -808,6 +878,18 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_dumps.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_prefix_post_processing {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING,
|
||||
experimental_no_edition_2024_for_prefix_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_facet_post_processing {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING,
|
||||
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -815,6 +897,16 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||
let IndexerOpts {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = other;
|
||||
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
|
||||
.build()?;
|
||||
@@ -822,17 +914,163 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
Ok(Self {
|
||||
thread_pool,
|
||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||
max_threads: *other.max_indexing_threads,
|
||||
max_memory: max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||
max_threads: max_indexing_threads.0,
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
experimental_no_edition_2024_for_settings: other
|
||||
.experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
|
||||
skip_index_budget: *skip_index_budget,
|
||||
experimental_no_edition_2024_for_settings: *experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps: *experimental_no_edition_2024_for_dumps,
|
||||
chunk_compression_type: Default::default(),
|
||||
chunk_compression_level: Default::default(),
|
||||
documents_chunk_size: Default::default(),
|
||||
max_nb_chunks: Default::default(),
|
||||
experimental_no_edition_2024_for_prefix_post_processing:
|
||||
*experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing:
|
||||
*experimental_no_edition_2024_for_facet_post_processing,
|
||||
s3_snapshot_options: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Parser, Deserialize)]
|
||||
// This group is a bit tricky but makes it possible to require all listed fields if one of them
|
||||
// is specified. It lets us keep an Option for the S3SnapshotOpts configuration.
|
||||
// <https://github.com/clap-rs/clap/issues/5092#issuecomment-2616986075>
|
||||
#[group(requires_all = ["s3_bucket_url", "s3_bucket_region", "s3_bucket_name", "s3_snapshot_prefix", "s3_access_key", "s3_secret_key"])]
|
||||
pub struct S3SnapshotOpts {
|
||||
/// The S3 bucket URL in the format https://s3.<region>.amazonaws.com.
|
||||
#[clap(long, env = MEILI_S3_BUCKET_URL, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_bucket_url: String,
|
||||
|
||||
/// The region in the format us-east-1.
|
||||
#[clap(long, env = MEILI_S3_BUCKET_REGION, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_bucket_region: String,
|
||||
|
||||
/// The bucket name.
|
||||
#[clap(long, env = MEILI_S3_BUCKET_NAME, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_bucket_name: String,
|
||||
|
||||
/// The prefix path where to put the snapshot, uses normal slashes (/).
|
||||
#[clap(long, env = MEILI_S3_SNAPSHOT_PREFIX, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_snapshot_prefix: String,
|
||||
|
||||
/// The S3 access key.
|
||||
#[clap(long, env = MEILI_S3_ACCESS_KEY, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_access_key: String,
|
||||
|
||||
/// The S3 secret key.
|
||||
#[clap(long, env = MEILI_S3_SECRET_KEY, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_secret_key: String,
|
||||
|
||||
/// The maximum number of parts that can be uploaded in parallel.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS, default_value_t = default_experimental_s3_snapshot_max_in_flight_parts())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_max_in_flight_parts")]
|
||||
pub experimental_s3_max_in_flight_parts: NonZeroUsize,
|
||||
|
||||
/// The compression level. Defaults to no compression (0).
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL, default_value_t = default_experimental_s3_snapshot_compression_level())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_compression_level")]
|
||||
pub experimental_s3_compression_level: u32,
|
||||
|
||||
/// The signature duration for the multipart upload.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS, default_value_t = default_experimental_s3_snapshot_signature_duration_seconds())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_signature_duration_seconds")]
|
||||
pub experimental_s3_signature_duration_seconds: u64,
|
||||
|
||||
/// The size of the the multipart parts.
|
||||
///
|
||||
/// Must not be less than 10MiB and larger than 8GiB. Yes,
|
||||
/// twice the boundaries of the AWS S3 multipart upload
|
||||
/// because we use it a bit differently internally.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE, default_value_t = default_experimental_s3_snapshot_multipart_part_size())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_multipart_part_size")]
|
||||
pub experimental_s3_multipart_part_size: Byte,
|
||||
}
|
||||
|
||||
impl S3SnapshotOpts {
|
||||
/// Exports the values to their corresponding env vars if they are not set.
|
||||
pub fn export_to_env(self) {
|
||||
let S3SnapshotOpts {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
experimental_s3_max_in_flight_parts,
|
||||
experimental_s3_compression_level,
|
||||
experimental_s3_signature_duration_seconds,
|
||||
experimental_s3_multipart_part_size,
|
||||
} = self;
|
||||
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET_URL, s3_bucket_url);
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET_REGION, s3_bucket_region);
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET_NAME, s3_bucket_name);
|
||||
export_to_env_if_not_present(MEILI_S3_SNAPSHOT_PREFIX, s3_snapshot_prefix);
|
||||
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
|
||||
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS,
|
||||
experimental_s3_max_in_flight_parts.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL,
|
||||
experimental_s3_compression_level.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS,
|
||||
experimental_s3_signature_duration_seconds.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE,
|
||||
experimental_s3_multipart_part_size.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<S3SnapshotOpts> for S3SnapshotOptions {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: S3SnapshotOpts) -> Result<Self, Self::Error> {
|
||||
let S3SnapshotOpts {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
experimental_s3_max_in_flight_parts,
|
||||
experimental_s3_compression_level,
|
||||
experimental_s3_signature_duration_seconds,
|
||||
experimental_s3_multipart_part_size,
|
||||
} = other;
|
||||
|
||||
Ok(S3SnapshotOptions {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_max_in_flight_parts: experimental_s3_max_in_flight_parts,
|
||||
s3_compression_level: experimental_s3_compression_level,
|
||||
s3_signature_duration: Duration::from_secs(experimental_s3_signature_duration_seconds),
|
||||
s3_multipart_part_size: experimental_s3_multipart_part_size.as_u64(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1051,6 +1289,22 @@ fn default_snapshot_interval_sec() -> &'static str {
|
||||
DEFAULT_SNAPSHOT_INTERVAL_SEC_STR
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_max_in_flight_parts() -> NonZeroUsize {
|
||||
DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_compression_level() -> u32 {
|
||||
DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_signature_duration_seconds() -> u64 {
|
||||
DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_multipart_part_size() -> Byte {
|
||||
DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE
|
||||
}
|
||||
|
||||
fn default_dump_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||
}
|
||||
|
||||
366
crates/meilisearch/src/personalization/mod.rs
Normal file
366
crates/meilisearch/src/personalization/mod.rs
Normal file
@@ -0,0 +1,366 @@
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
use meilisearch_types::{
|
||||
error::{Code, ErrorCode, ResponseError},
|
||||
milli::TimeBudget,
|
||||
};
|
||||
use rand::Rng;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
|
||||
const MAX_RETRIES: u32 = 10;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum PersonalizationError {
|
||||
#[error("Personalization service: HTTP request failed: {0}")]
|
||||
Request(#[from] reqwest::Error),
|
||||
#[error("Personalization service: Failed to parse response: {0}")]
|
||||
Parse(String),
|
||||
#[error("Personalization service: Cohere API error: {0}")]
|
||||
Api(String),
|
||||
#[error("Personalization service: Unauthorized: invalid API key")]
|
||||
Unauthorized,
|
||||
#[error("Personalization service: Rate limited: too many requests")]
|
||||
RateLimited,
|
||||
#[error("Personalization service: Bad request: {0}")]
|
||||
BadRequest(String),
|
||||
#[error("Personalization service: Internal server error: {0}")]
|
||||
InternalServerError(String),
|
||||
#[error("Personalization service: Network error: {0}")]
|
||||
Network(String),
|
||||
#[error("Personalization service: Deadline exceeded")]
|
||||
DeadlineExceeded,
|
||||
#[error(transparent)]
|
||||
FeatureNotEnabled(#[from] index_scheduler::error::FeatureNotEnabledError),
|
||||
}
|
||||
|
||||
impl ErrorCode for PersonalizationError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
PersonalizationError::FeatureNotEnabled { .. } => Code::FeatureNotEnabled,
|
||||
PersonalizationError::Unauthorized => Code::RemoteInvalidApiKey,
|
||||
PersonalizationError::RateLimited => Code::TooManySearchRequests,
|
||||
PersonalizationError::BadRequest(_) => Code::RemoteBadRequest,
|
||||
PersonalizationError::InternalServerError(_) => Code::RemoteRemoteError,
|
||||
PersonalizationError::Network(_) | PersonalizationError::Request(_) => {
|
||||
Code::RemoteCouldNotSendRequest
|
||||
}
|
||||
PersonalizationError::Parse(_) | PersonalizationError::Api(_) => {
|
||||
Code::RemoteBadResponse
|
||||
}
|
||||
PersonalizationError::DeadlineExceeded => Code::Internal, // should not be returned to the client
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CohereService {
|
||||
client: Client,
|
||||
api_key: String,
|
||||
}
|
||||
|
||||
impl CohereService {
|
||||
pub fn new(api_key: String) -> Self {
|
||||
info!("Personalization service initialized with Cohere API");
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()
|
||||
.expect("Failed to create HTTP client");
|
||||
Self { client, api_key }
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: &Personalize,
|
||||
query: Option<&str>,
|
||||
time_budget: TimeBudget,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
if time_budget.exceeded() {
|
||||
warn!("Could not rerank due to deadline");
|
||||
// If the deadline is exceeded, return the original search result instead of an error
|
||||
return Ok(search_result);
|
||||
}
|
||||
|
||||
// Extract user context from personalization
|
||||
let user_context = personalize.user_context.as_str();
|
||||
|
||||
// Build the prompt by merging query and user context
|
||||
let prompt = match query {
|
||||
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
|
||||
None => format!("User Context: {user_context}"),
|
||||
};
|
||||
|
||||
// Extract documents for reranking
|
||||
let documents: Vec<String> = search_result
|
||||
.hits
|
||||
.iter()
|
||||
.map(|hit| {
|
||||
// Convert the document to a string representation for reranking
|
||||
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
|
||||
})
|
||||
.collect();
|
||||
|
||||
if documents.is_empty() {
|
||||
return Ok(search_result);
|
||||
}
|
||||
|
||||
// Call Cohere's rerank API with retry logic
|
||||
let reranked_indices =
|
||||
match self.call_rerank_with_retry(&prompt, &documents, time_budget).await {
|
||||
Ok(indices) => indices,
|
||||
Err(PersonalizationError::DeadlineExceeded) => {
|
||||
// If the deadline is exceeded, return the original search result instead of an error
|
||||
return Ok(search_result);
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
|
||||
|
||||
// Reorder the hits based on Cohere's reranking
|
||||
let mut reranked_hits = Vec::new();
|
||||
for index in reranked_indices.iter() {
|
||||
if let Some(hit) = search_result.hits.get(*index) {
|
||||
reranked_hits.push(hit.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(SearchResult { hits: reranked_hits, ..search_result })
|
||||
}
|
||||
|
||||
async fn call_rerank_with_retry(
|
||||
&self,
|
||||
query: &str,
|
||||
documents: &[String],
|
||||
time_budget: TimeBudget,
|
||||
) -> Result<Vec<usize>, PersonalizationError> {
|
||||
let request_body = CohereRerankRequest {
|
||||
query: query.to_string(),
|
||||
documents: documents.to_vec(),
|
||||
model: "rerank-english-v3.0".to_string(),
|
||||
};
|
||||
|
||||
// Retry loop similar to vector extraction
|
||||
for attempt in 0..MAX_RETRIES {
|
||||
let response_result = self.send_rerank_request(&request_body).await;
|
||||
|
||||
let retry_duration = match self.handle_response(response_result).await {
|
||||
Ok(indices) => return Ok(indices),
|
||||
Err(retry) => {
|
||||
warn!("Cohere rerank attempt #{} failed: {}", attempt, retry.error);
|
||||
|
||||
if time_budget.exceeded() {
|
||||
warn!("Could not rerank due to deadline");
|
||||
return Err(PersonalizationError::DeadlineExceeded);
|
||||
} else {
|
||||
match retry.into_duration(attempt) {
|
||||
Ok(d) => d,
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// randomly up to double the retry duration
|
||||
let retry_duration = retry_duration
|
||||
+ rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration);
|
||||
|
||||
warn!("Retrying after {}ms", retry_duration.as_millis());
|
||||
tokio::time::sleep(retry_duration).await;
|
||||
}
|
||||
|
||||
// Final attempt without retry
|
||||
let response_result = self.send_rerank_request(&request_body).await;
|
||||
|
||||
match self.handle_response(response_result).await {
|
||||
Ok(indices) => Ok(indices),
|
||||
Err(retry) => Err(retry.into_error()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_rerank_request(
|
||||
&self,
|
||||
request_body: &CohereRerankRequest,
|
||||
) -> Result<reqwest::Response, reqwest::Error> {
|
||||
self.client
|
||||
.post(COHERE_API_URL)
|
||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||
.header("Content-Type", "application/json")
|
||||
.json(request_body)
|
||||
.send()
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_response(
|
||||
&self,
|
||||
response_result: Result<reqwest::Response, reqwest::Error>,
|
||||
) -> Result<Vec<usize>, Retry> {
|
||||
let response = match response_result {
|
||||
Ok(r) => r,
|
||||
Err(e) if e.is_timeout() => {
|
||||
return Err(Retry::retry_later(PersonalizationError::Network(format!(
|
||||
"Request timeout: {}",
|
||||
e
|
||||
))));
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(Retry::retry_later(PersonalizationError::Network(format!(
|
||||
"Network error: {}",
|
||||
e
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
let status = response.status();
|
||||
let status_code = status.as_u16();
|
||||
|
||||
if status.is_success() {
|
||||
let rerank_response: CohereRerankResponse = match response.json().await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(Retry::retry_later(PersonalizationError::Parse(format!(
|
||||
"Failed to parse response: {}",
|
||||
e
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
// Extract indices from rerank results
|
||||
let indices: Vec<usize> =
|
||||
rerank_response.results.iter().map(|result| result.index as usize).collect();
|
||||
|
||||
return Ok(indices);
|
||||
}
|
||||
|
||||
// Handle error status codes
|
||||
let error_body = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
|
||||
let retry = match status_code {
|
||||
401 => Retry::give_up(PersonalizationError::Unauthorized),
|
||||
429 => Retry::rate_limited(PersonalizationError::RateLimited),
|
||||
400 => Retry::give_up(PersonalizationError::BadRequest(error_body)),
|
||||
500..=599 => Retry::retry_later(PersonalizationError::InternalServerError(format!(
|
||||
"Status {}: {}",
|
||||
status_code, error_body
|
||||
))),
|
||||
402..=499 => Retry::give_up(PersonalizationError::Api(format!(
|
||||
"Status {}: {}",
|
||||
status_code, error_body
|
||||
))),
|
||||
_ => Retry::retry_later(PersonalizationError::Api(format!(
|
||||
"Unexpected status {}: {}",
|
||||
status_code, error_body
|
||||
))),
|
||||
};
|
||||
|
||||
Err(retry)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct CohereRerankRequest {
|
||||
query: String,
|
||||
documents: Vec<String>,
|
||||
model: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct CohereRerankResponse {
|
||||
results: Vec<CohereRerankResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct CohereRerankResult {
|
||||
index: u32,
|
||||
}
|
||||
|
||||
// Retry strategy similar to vector extraction
|
||||
struct Retry {
|
||||
error: PersonalizationError,
|
||||
strategy: RetryStrategy,
|
||||
}
|
||||
|
||||
enum RetryStrategy {
|
||||
GiveUp,
|
||||
Retry,
|
||||
RetryAfterRateLimit,
|
||||
}
|
||||
|
||||
impl Retry {
|
||||
fn give_up(error: PersonalizationError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::GiveUp }
|
||||
}
|
||||
|
||||
fn retry_later(error: PersonalizationError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::Retry }
|
||||
}
|
||||
|
||||
fn rate_limited(error: PersonalizationError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
|
||||
}
|
||||
|
||||
fn into_duration(self, attempt: u32) -> Result<Duration, PersonalizationError> {
|
||||
match self.strategy {
|
||||
RetryStrategy::GiveUp => Err(self.error),
|
||||
RetryStrategy::Retry => {
|
||||
// Exponential backoff: 10^attempt milliseconds
|
||||
Ok(Duration::from_millis((10u64).pow(attempt)))
|
||||
}
|
||||
RetryStrategy::RetryAfterRateLimit => {
|
||||
// Longer backoff for rate limits: 100ms + exponential
|
||||
Ok(Duration::from_millis(100 + (10u64).pow(attempt)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn into_error(self) -> PersonalizationError {
|
||||
self.error
|
||||
}
|
||||
}
|
||||
|
||||
pub enum PersonalizationService {
|
||||
Cohere(CohereService),
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl PersonalizationService {
|
||||
pub fn cohere(api_key: String) -> Self {
|
||||
// If the API key is empty, consider the personalization service as disabled
|
||||
if api_key.trim().is_empty() {
|
||||
Self::disabled()
|
||||
} else {
|
||||
Self::Cohere(CohereService::new(api_key))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn disabled() -> Self {
|
||||
debug!("Personalization service disabled");
|
||||
Self::Disabled
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: &Personalize,
|
||||
query: Option<&str>,
|
||||
time_budget: TimeBudget,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
match self {
|
||||
Self::Cohere(cohere_service) => {
|
||||
cohere_service
|
||||
.rerank_search_results(search_result, personalize, query, time_budget)
|
||||
.await
|
||||
}
|
||||
Self::Disabled => Err(PersonalizationError::FeatureNotEnabled(
|
||||
index_scheduler::error::FeatureNotEnabledError {
|
||||
disabled_action: "reranking search results",
|
||||
feature: "personalization",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/866",
|
||||
},
|
||||
)
|
||||
.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
84
crates/meilisearch/src/routes/indexes/compact.rs
Normal file
84
crates/meilisearch/src/routes/indexes/compact.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
use utoipa::OpenApi;
|
||||
|
||||
use super::ActionPolicy;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(compact),
|
||||
tags(
|
||||
(
|
||||
name = "Compact an index",
|
||||
description = "The /compact route uses compacts the database to reorganize and make it smaller and more efficient.",
|
||||
external_docs(url = "https://www.meilisearch.com/docs/reference/api/compact"),
|
||||
),
|
||||
),
|
||||
)]
|
||||
pub struct CompactApi;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(compact))));
|
||||
}
|
||||
|
||||
/// Compact an index
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "{indexUid}/compact",
|
||||
tag = "Compact an index",
|
||||
security(("Bearer" = ["search", "*"])),
|
||||
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
|
||||
responses(
|
||||
(status = ACCEPTED, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
|
||||
{
|
||||
"taskUid": 147,
|
||||
"indexUid": null,
|
||||
"status": "enqueued",
|
||||
"type": "documentDeletion",
|
||||
"enqueuedAt": "2024-08-08T17:05:55.791772Z"
|
||||
}
|
||||
)),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
)
|
||||
)]
|
||||
pub async fn compact(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_COMPACT }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
analytics.publish(IndexCompacted::default(), &req);
|
||||
|
||||
let task = KindWithContent::IndexCompaction { index_uid: index_uid.to_string() };
|
||||
let task =
|
||||
match tokio::task::spawn_blocking(move || index_scheduler.register(task, None, false))
|
||||
.await?
|
||||
{
|
||||
Ok(task) => task,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
debug!(returns = ?task, "Compact the {index_uid} index");
|
||||
Ok(HttpResponse::Accepted().json(SummarizedTaskView::from(task)))
|
||||
}
|
||||
|
||||
crate::empty_analytics!(IndexCompacted, "Index Compacted");
|
||||
@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::is_dry_run;
|
||||
use crate::Opt;
|
||||
|
||||
pub mod compact;
|
||||
pub mod documents;
|
||||
mod enterprise_edition;
|
||||
pub mod facet_search;
|
||||
@@ -49,8 +50,9 @@ pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TAS
|
||||
(path = "/", api = facet_search::FacetSearchApi),
|
||||
(path = "/", api = similar::SimilarApi),
|
||||
(path = "/", api = settings::SettingsApi),
|
||||
(path = "/", api = compact::CompactApi),
|
||||
),
|
||||
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats),
|
||||
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats, compact::compact),
|
||||
tags(
|
||||
(
|
||||
name = "Indexes",
|
||||
@@ -80,7 +82,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/similar").configure(similar::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
.service(web::scope("/settings").configure(settings::configure))
|
||||
.service(web::scope("/compact").configure(compact::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
use utoipa::{IntoParams, OpenApi};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
@@ -21,11 +22,12 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
|
||||
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult,
|
||||
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -132,6 +134,8 @@ pub struct SearchQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
||||
#[param(value_type = Vec<Locale>, explode = false)]
|
||||
pub locales: Option<CS<Locale>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub personalize_user_context: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
@@ -203,6 +207,9 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
));
|
||||
}
|
||||
|
||||
let personalize =
|
||||
other.personalize_user_context.map(|user_context| Personalize { user_context });
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
// `media` not supported for `GET`
|
||||
@@ -232,6 +239,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||
personalize,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -320,12 +328,14 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
||||
pub async fn search_with_url_query(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Search get");
|
||||
let request_uid = Uuid::now_v7();
|
||||
debug!(request_uid = ?request_uid, parameters = ?params, "Search get");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query: SearchQuery = params.into_inner().try_into()?;
|
||||
@@ -339,31 +349,56 @@ pub async fn search_with_url_query(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.take();
|
||||
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
index_scheduler.features(),
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
if let Ok((search_result, _)) = search_result.as_ref() {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let search_result = search_result?;
|
||||
let (mut search_result, time_budget) = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search get");
|
||||
// Apply personalization if requested
|
||||
if let Some(personalize) = personalize.as_ref() {
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(
|
||||
search_result,
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
@@ -426,15 +461,17 @@ pub async fn search_with_url_query(
|
||||
pub async fn search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<SearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let request_uid = Uuid::now_v7();
|
||||
|
||||
let mut query = params.into_inner();
|
||||
debug!(parameters = ?query, "Search post");
|
||||
debug!(request_uid = ?request_uid, parameters = ?query, "Search post");
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
@@ -445,25 +482,37 @@ pub async fn search_with_post(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.take();
|
||||
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
if let Ok((ref search_result, _)) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
if search_result.degraded {
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||
@@ -471,9 +520,21 @@ pub async fn search_with_post(
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let search_result = search_result?;
|
||||
let (mut search_result, time_budget) = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search post");
|
||||
// Apply personalization if requested
|
||||
if let Some(personalize) = personalize.as_ref() {
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(
|
||||
search_result,
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use serde_json::{json, Value};
|
||||
|
||||
use crate::aggregate_methods;
|
||||
use crate::analytics::{Aggregate, AggregateMethod};
|
||||
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
|
||||
use crate::search::{
|
||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
@@ -95,6 +96,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
|
||||
// personalization
|
||||
total_personalized: usize,
|
||||
|
||||
marker: std::marker::PhantomData<Method>,
|
||||
}
|
||||
|
||||
@@ -129,6 +133,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -204,6 +209,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
ret.locales = locales.iter().copied().collect();
|
||||
}
|
||||
|
||||
// personalization
|
||||
if personalize.is_some() {
|
||||
ret.total_personalized = 1;
|
||||
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
|
||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
||||
@@ -234,6 +245,8 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
facet_stats: _,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
request_uid: _,
|
||||
metadata: _,
|
||||
} = result;
|
||||
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
@@ -294,6 +307,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
mut locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *new;
|
||||
|
||||
@@ -379,6 +393,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
// locales
|
||||
self.locales.append(&mut locales);
|
||||
|
||||
// personalization
|
||||
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
@@ -424,6 +441,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *self;
|
||||
|
||||
@@ -497,6 +515,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
"personalization": {
|
||||
"total_personalized": total_personalized,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,12 +180,6 @@ pub async fn get_metrics(
|
||||
|
||||
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
|
||||
|
||||
// We cannot specify the version with ContentType(TEXT_PLAIN_UTF_8) so we have to write everything by hand :(
|
||||
// see the following for what should be returned: https://prometheus.io/docs/instrumenting/content_negotiation/#content-type-response
|
||||
let content_type = ("content-type", "text/plain; version=0.0.4; charset=utf-8");
|
||||
|
||||
Ok(HttpResponse::Ok()
|
||||
// .insert_header(header::ContentType(mime::TEXT_PLAIN_UTF_8))
|
||||
.insert_header(content_type)
|
||||
.body(response))
|
||||
let content_type = ("content-type", prometheus::TEXT_FORMAT);
|
||||
Ok(HttpResponse::Ok().insert_header(content_type).body(response))
|
||||
}
|
||||
|
||||
@@ -41,10 +41,13 @@ use crate::routes::indexes::IndexView;
|
||||
use crate::routes::multi_search::SearchResults;
|
||||
use crate::routes::network::{Network, Remote};
|
||||
use crate::routes::swap_indexes::SwapIndexesPayload;
|
||||
use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata};
|
||||
use crate::routes::webhooks::{
|
||||
WebhookResults, WebhookSettings, WebhookWithMetadataRedactedAuthorization,
|
||||
};
|
||||
use crate::search::{
|
||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
|
||||
INCLUDE_METADATA_HEADER,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
use crate::Opt;
|
||||
@@ -102,7 +105,7 @@ mod webhooks;
|
||||
url = "/",
|
||||
description = "Local server",
|
||||
)),
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadata, meilisearch_types::milli::vector::VectorStoreBackend))
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadataRedactedAuthorization, meilisearch_types::milli::vector::VectorStoreBackend))
|
||||
)]
|
||||
pub struct MeilisearchApi;
|
||||
|
||||
@@ -184,6 +187,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
.is_some_and(|s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
/// Parse the `Meili-Include-Metadata` header from an HTTP request.
|
||||
///
|
||||
/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive).
|
||||
/// Returns `false` if the header is not present or has any other value.
|
||||
pub fn parse_include_metadata_header(req: &HttpRequest) -> bool {
|
||||
req.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|v| matches!(v.to_lowercase().as_str(), "true" | "1"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
|
||||
@@ -9,6 +9,7 @@ use meilisearch_types::keys::actions;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::multi_search_analytics::MultiSearchAggregator;
|
||||
use crate::analytics::Analytics;
|
||||
@@ -17,10 +18,11 @@ use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
||||
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex,
|
||||
SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -144,6 +146,7 @@ pub struct SearchResults {
|
||||
pub async fn multi_search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
@@ -151,6 +154,7 @@ pub async fn multi_search_with_post(
|
||||
// Since we don't want to process half of the search requests and then get a permit refused
|
||||
// we're going to get one permit for the whole duration of the multi-search request.
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let request_uid = Uuid::now_v7();
|
||||
|
||||
let federated_search = params.into_inner();
|
||||
|
||||
@@ -186,15 +190,30 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
let response = match federation {
|
||||
Some(federation) => {
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
federation = ?federation,
|
||||
parameters = ?queries,
|
||||
"Federated-search"
|
||||
);
|
||||
|
||||
// check remote header
|
||||
let is_proxy = req
|
||||
.headers()
|
||||
.get(PROXY_SEARCH_HEADER)
|
||||
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
|
||||
let search_result =
|
||||
perform_federated_search(&index_scheduler, queries, federation, features, is_proxy)
|
||||
let search_result = perform_federated_search(
|
||||
&index_scheduler,
|
||||
queries,
|
||||
federation,
|
||||
features,
|
||||
is_proxy,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
@@ -203,6 +222,13 @@ pub async fn multi_search_with_post(
|
||||
}
|
||||
|
||||
analytics.publish(multi_aggregate, &req);
|
||||
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
returns = ?search_result,
|
||||
"Federated-search"
|
||||
);
|
||||
|
||||
HttpResponse::Ok().json(search_result?)
|
||||
}
|
||||
None => {
|
||||
@@ -211,12 +237,17 @@ pub async fn multi_search_with_post(
|
||||
// changes.
|
||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||
let mut search_results = Vec::with_capacity(queries.len());
|
||||
for (query_index, (index_uid, query, federation_options)) in queries
|
||||
for (query_index, (index_uid, mut query, federation_options)) in queries
|
||||
.into_iter()
|
||||
.map(SearchQueryWithIndex::into_index_query_federation)
|
||||
.enumerate()
|
||||
{
|
||||
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
on_index = query_index,
|
||||
parameters = ?query,
|
||||
"Multi-search"
|
||||
);
|
||||
|
||||
if federation_options.is_some() {
|
||||
return Err((
|
||||
@@ -239,6 +270,13 @@ pub async fn multi_search_with_post(
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.take();
|
||||
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query =
|
||||
personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let index_uid_str = index_uid.to_string();
|
||||
|
||||
let search_kind = search_kind(
|
||||
@@ -250,22 +288,40 @@ pub async fn multi_search_with_post(
|
||||
.with_index(query_index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid_str.clone(),
|
||||
&index,
|
||||
SearchParams {
|
||||
index_uid: index_uid_str.clone(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?
|
||||
.with_index(query_index)?;
|
||||
|
||||
// Apply personalization if requested
|
||||
if let Some(personalize) = personalize.as_ref() {
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(
|
||||
search_result,
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
)
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
}
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
result: search_result.with_index(query_index)?,
|
||||
result: search_result,
|
||||
});
|
||||
}
|
||||
Ok(search_results)
|
||||
@@ -286,7 +342,11 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
debug!(returns = ?search_results, "Multi-search");
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
returns = ?search_results,
|
||||
"Multi-search"
|
||||
);
|
||||
|
||||
HttpResponse::Ok().json(SearchResults { results: search_results })
|
||||
}
|
||||
|
||||
@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
locales: _,
|
||||
personalize: _,
|
||||
} in &federated_search.queries
|
||||
{
|
||||
if let Some(federation_options) = federation_options {
|
||||
|
||||
@@ -226,14 +226,14 @@ mod tests {
|
||||
{
|
||||
let params = "types=createIndex";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r#"
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use core::convert::Infallible;
|
||||
use std::collections::BTreeMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -7,7 +8,6 @@ use actix_http::header::{
|
||||
};
|
||||
use actix_web::web::{self, Data, Path};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use core::convert::Infallible;
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use index_scheduler::IndexScheduler;
|
||||
@@ -24,12 +24,12 @@ use tracing::debug;
|
||||
use url::Url;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
use uuid::Uuid;
|
||||
use WebhooksError::*;
|
||||
|
||||
use crate::analytics::{Aggregate, Analytics};
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use WebhooksError::*;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
@@ -90,7 +90,7 @@ fn deny_immutable_fields_webhook(
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub(super) struct WebhookWithMetadata {
|
||||
pub(super) struct WebhookWithMetadataRedactedAuthorization {
|
||||
uuid: Uuid,
|
||||
is_editable: bool,
|
||||
#[schema(value_type = WebhookSettings)]
|
||||
@@ -98,8 +98,9 @@ pub(super) struct WebhookWithMetadata {
|
||||
webhook: Webhook,
|
||||
}
|
||||
|
||||
impl WebhookWithMetadata {
|
||||
pub fn from(uuid: Uuid, webhook: Webhook) -> Self {
|
||||
impl WebhookWithMetadataRedactedAuthorization {
|
||||
pub fn from(uuid: Uuid, mut webhook: Webhook) -> Self {
|
||||
webhook.redact_authorization_header();
|
||||
Self { uuid, is_editable: uuid != Uuid::nil(), webhook }
|
||||
}
|
||||
}
|
||||
@@ -107,7 +108,7 @@ impl WebhookWithMetadata {
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(super) struct WebhookResults {
|
||||
results: Vec<WebhookWithMetadata>,
|
||||
results: Vec<WebhookWithMetadataRedactedAuthorization>,
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
@@ -150,7 +151,7 @@ async fn get_webhooks(
|
||||
let results = webhooks
|
||||
.webhooks
|
||||
.into_iter()
|
||||
.map(|(uuid, webhook)| WebhookWithMetadata::from(uuid, webhook))
|
||||
.map(|(uuid, webhook)| WebhookWithMetadataRedactedAuthorization::from(uuid, webhook))
|
||||
.collect::<Vec<_>>();
|
||||
let results = WebhookResults { results };
|
||||
|
||||
@@ -301,7 +302,7 @@ fn check_changed(uuid: Uuid, webhook: &Webhook) -> Result<(), WebhooksError> {
|
||||
tag = "Webhooks",
|
||||
security(("Bearer" = ["webhooks.get", "webhooks.*", "*.get", "*"])),
|
||||
responses(
|
||||
(status = 200, description = "Webhook found", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
||||
(status = 200, description = "Webhook found", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"url": "https://your.site/on-tasks-completed",
|
||||
"headers": {
|
||||
@@ -324,7 +325,7 @@ async fn get_webhook(
|
||||
let mut webhooks = index_scheduler.webhooks_view();
|
||||
|
||||
let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
|
||||
let webhook = WebhookWithMetadata::from(uuid, webhook);
|
||||
let webhook = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||
|
||||
debug!(returns = ?webhook, "Get webhook");
|
||||
Ok(HttpResponse::Ok().json(webhook))
|
||||
@@ -337,7 +338,7 @@ async fn get_webhook(
|
||||
request_body = WebhookSettings,
|
||||
security(("Bearer" = ["webhooks.create", "webhooks.*", "*"])),
|
||||
responses(
|
||||
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
||||
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"url": "https://your.site/on-tasks-completed",
|
||||
"headers": {
|
||||
@@ -383,7 +384,7 @@ async fn post_webhook(
|
||||
|
||||
analytics.publish(PostWebhooksAnalytics, &req);
|
||||
|
||||
let response = WebhookWithMetadata::from(uuid, webhook);
|
||||
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||
debug!(returns = ?response, "Post webhook");
|
||||
Ok(HttpResponse::Created().json(response))
|
||||
}
|
||||
@@ -395,7 +396,7 @@ async fn post_webhook(
|
||||
request_body = WebhookSettings,
|
||||
security(("Bearer" = ["webhooks.update", "webhooks.*", "*"])),
|
||||
responses(
|
||||
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
||||
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"url": "https://your.site/on-tasks-completed",
|
||||
"headers": {
|
||||
@@ -435,7 +436,7 @@ async fn patch_webhook(
|
||||
|
||||
analytics.publish(PatchWebhooksAnalytics, &req);
|
||||
|
||||
let response = WebhookWithMetadata::from(uuid, webhook);
|
||||
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||
debug!(returns = ?response, "Patch webhook");
|
||||
Ok(HttpResponse::Ok().json(response))
|
||||
}
|
||||
|
||||
@@ -17,11 +17,13 @@ use meilisearch_types::milli::vector::Embedding;
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
|
||||
use roaring::RoaringBitmap;
|
||||
use tokio::task::JoinHandle;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::super::ranking_rules::{self, RankingRules};
|
||||
use super::super::{
|
||||
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker,
|
||||
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
|
||||
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchMetadata, SearchQuery,
|
||||
SearchQueryWithIndex,
|
||||
};
|
||||
use super::proxy::{proxy_search, ProxySearchError, ProxySearchParams};
|
||||
use super::types::{
|
||||
@@ -39,32 +41,58 @@ pub async fn perform_federated_search(
|
||||
federation: Federation,
|
||||
features: RoFeatures,
|
||||
is_proxy: bool,
|
||||
request_uid: Uuid,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
if is_proxy {
|
||||
features.check_network("Performing a remote federated search")?;
|
||||
}
|
||||
let before_search = std::time::Instant::now();
|
||||
let deadline = before_search + std::time::Duration::from_secs(9);
|
||||
|
||||
let timeout = std::env::var("MEILI_EXPERIMENTAL_REMOTE_SEARCH_TIMEOUT_SECONDS")
|
||||
.ok()
|
||||
.map(|p| p.parse().unwrap())
|
||||
.unwrap_or(25);
|
||||
|
||||
let deadline = before_search + std::time::Duration::from_secs(timeout);
|
||||
|
||||
let required_hit_count = federation.limit + federation.offset;
|
||||
let retrieve_vectors = queries.iter().any(|q| q.retrieve_vectors);
|
||||
|
||||
let network = index_scheduler.network();
|
||||
|
||||
// Preconstruct metadata keeping the original queries order for later metadata building
|
||||
let precomputed_query_metadata: Option<Vec<_>> = include_metadata.then(|| {
|
||||
queries
|
||||
.iter()
|
||||
.map(|q| {
|
||||
(
|
||||
q.index_uid.to_string(),
|
||||
q.federation_options.as_ref().and_then(|o| o.remote.clone()),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
// this implementation partition the queries by index to guarantee an important property:
|
||||
// - all the queries to a particular index use the same read transaction.
|
||||
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||
|
||||
// 1. partition queries by host and index
|
||||
let mut partitioned_queries = PartitionedQueries::new();
|
||||
|
||||
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||
partitioned_queries.partition(federated_query, query_index, &network, features)?
|
||||
}
|
||||
|
||||
// 2. perform queries, merge and make hits index by index
|
||||
// 2.1. start remote queries
|
||||
let remote_search =
|
||||
RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline);
|
||||
let remote_search = RemoteSearch::start(
|
||||
partitioned_queries.remote_queries_by_host,
|
||||
&federation,
|
||||
deadline,
|
||||
include_metadata,
|
||||
);
|
||||
|
||||
// 2.2. concurrently execute local queries
|
||||
let params = SearchByIndexParams {
|
||||
@@ -106,11 +134,25 @@ pub async fn perform_federated_search(
|
||||
let after_waiting_remote_results = std::time::Instant::now();
|
||||
|
||||
// 3. merge hits and metadata across indexes and hosts
|
||||
// 3.1. merge metadata
|
||||
|
||||
// 3.1. Build metadata in the same order as the original queries
|
||||
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
|
||||
// If a remote is present, set the local remote name
|
||||
let local_remote_name = network.local.clone().filter(|_| partitioned_queries.has_remote);
|
||||
|
||||
build_query_metadata(
|
||||
precomputed_query_metadata,
|
||||
local_remote_name,
|
||||
&remote_results,
|
||||
&results_by_index,
|
||||
)
|
||||
});
|
||||
|
||||
// 3.2. merge federation metadata
|
||||
let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) =
|
||||
merge_metadata(&mut results_by_index, &remote_results);
|
||||
|
||||
// 3.2. merge hits
|
||||
// 3.3. merge hits
|
||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
|
||||
.skip(federation.offset)
|
||||
.take(federation.limit)
|
||||
@@ -125,7 +167,7 @@ pub async fn perform_federated_search(
|
||||
.map(|hit| hit.hit())
|
||||
.collect();
|
||||
|
||||
// 3.3. merge query vectors
|
||||
// 3.4. merge query vectors
|
||||
let query_vectors = if retrieve_vectors {
|
||||
for remote_results in remote_results.iter_mut() {
|
||||
if let Some(remote_vectors) = remote_results.query_vectors.take() {
|
||||
@@ -144,7 +186,7 @@ pub async fn perform_federated_search(
|
||||
None
|
||||
};
|
||||
|
||||
// 3.4. merge facets
|
||||
// 3.5. merge facets
|
||||
let (facet_distribution, facet_stats, facets_by_index) =
|
||||
facet_order.merge(federation.merge_facets, remote_results, facets);
|
||||
|
||||
@@ -170,6 +212,8 @@ pub async fn perform_federated_search(
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
|
||||
request_uid: Some(request_uid),
|
||||
metadata: query_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -219,7 +263,7 @@ struct SearchResultByQueryIterItem<'a> {
|
||||
|
||||
fn merge_index_local_results(
|
||||
results_by_query: Vec<SearchResultByQuery<'_>>,
|
||||
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
|
||||
) -> impl Iterator<Item = SearchResultByQueryIterItem<'_>> + '_ {
|
||||
itertools::kmerge_by(
|
||||
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|
||||
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
|
||||
@@ -393,6 +437,7 @@ struct SearchHitByIndex {
|
||||
|
||||
struct SearchResultByIndex {
|
||||
index: String,
|
||||
primary_key: Option<String>,
|
||||
hits: Vec<SearchHitByIndex>,
|
||||
estimated_total_hits: usize,
|
||||
degraded: bool,
|
||||
@@ -400,6 +445,61 @@ struct SearchResultByIndex {
|
||||
facets: Option<ComputedFacets>,
|
||||
}
|
||||
|
||||
/// Builds query metadata for federated search results.
|
||||
///
|
||||
/// This function creates metadata for each query in the same order as the original queries,
|
||||
/// combining information from both local and remote search results. It handles the mapping
|
||||
/// of primary keys to their respective indexes and remotes to prevent collisions when
|
||||
/// multiple remotes have the same index_uid but different primary keys.
|
||||
fn build_query_metadata(
|
||||
precomputed_query_metadata: Vec<(String, Option<String>)>,
|
||||
local_remote_name: Option<String>,
|
||||
remote_results: &[FederatedSearchResult],
|
||||
results_by_index: &[SearchResultByIndex],
|
||||
) -> Vec<SearchMetadata> {
|
||||
// Create a map of (remote, index_uid) -> primary_key for quick lookup
|
||||
// This prevents collisions when multiple remotes have the same index_uid but different primary keys
|
||||
let mut primary_key_per_index = std::collections::HashMap::new();
|
||||
|
||||
// Build metadata for remote results
|
||||
for remote_result in remote_results {
|
||||
if let Some(remote_metadata) = &remote_result.metadata {
|
||||
for remote_meta in remote_metadata {
|
||||
if let SearchMetadata {
|
||||
remote: Some(remote_name),
|
||||
index_uid,
|
||||
primary_key: Some(primary_key),
|
||||
..
|
||||
} = remote_meta
|
||||
{
|
||||
let key = (Some(remote_name), index_uid);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata for local results
|
||||
for local_meta in results_by_index {
|
||||
if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = local_meta {
|
||||
let key = (None, index);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata in the same order as the original queries
|
||||
let mut query_metadata = Vec::new();
|
||||
for (index_uid, remote) in precomputed_query_metadata {
|
||||
let primary_key =
|
||||
primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string());
|
||||
let query_uid = Uuid::now_v7();
|
||||
// if the remote is not set, use the local remote name
|
||||
let remote = remote.or_else(|| local_remote_name.clone());
|
||||
query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote });
|
||||
}
|
||||
query_metadata
|
||||
}
|
||||
|
||||
fn merge_metadata(
|
||||
results_by_index: &mut Vec<SearchResultByIndex>,
|
||||
remote_results: &Vec<FederatedSearchResult>,
|
||||
@@ -411,6 +511,7 @@ fn merge_metadata(
|
||||
let mut max_remote_duration = Duration::ZERO;
|
||||
for SearchResultByIndex {
|
||||
index,
|
||||
primary_key: _,
|
||||
hits: _,
|
||||
estimated_total_hits: estimated_total_hits_by_index,
|
||||
facets: facets_by_index,
|
||||
@@ -439,6 +540,8 @@ fn merge_metadata(
|
||||
degraded: degraded_for_host,
|
||||
used_negative_operator: host_used_negative_operator,
|
||||
remote_errors: _,
|
||||
metadata: _,
|
||||
request_uid: _,
|
||||
} in remote_results
|
||||
{
|
||||
let this_remote_duration = Duration::from_millis(*processing_time_ms as u64);
|
||||
@@ -498,6 +601,10 @@ impl PartitionedQueries {
|
||||
.into());
|
||||
}
|
||||
|
||||
if federated_query.has_personalize() {
|
||||
return Err(MeilisearchHttpError::PersonalizationInFederatedQuery(query_index).into());
|
||||
}
|
||||
|
||||
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||
|
||||
let federation_options = federation_options.unwrap_or_default();
|
||||
@@ -566,7 +673,12 @@ struct RemoteSearch {
|
||||
}
|
||||
|
||||
impl RemoteSearch {
|
||||
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
|
||||
fn start(
|
||||
queries: RemoteQueriesByHost,
|
||||
federation: &Federation,
|
||||
deadline: Instant,
|
||||
include_metadata: bool,
|
||||
) -> Self {
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_millis(200))
|
||||
@@ -586,7 +698,10 @@ impl RemoteSearch {
|
||||
// never merge distant facets
|
||||
proxy_federation.merge_facets = None;
|
||||
let params = params.clone();
|
||||
async move { proxy_search(&node, queries, proxy_federation, ¶ms).await }
|
||||
async move {
|
||||
proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata)
|
||||
.await
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
@@ -630,6 +745,13 @@ impl RemoteSearch {
|
||||
continue 'remote_queries;
|
||||
}
|
||||
|
||||
// Add remote name to metadata
|
||||
if let Some(metadata) = res.metadata.as_mut() {
|
||||
for meta in metadata {
|
||||
meta.remote = Some(node_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
federation.insert(
|
||||
FEDERATION_REMOTE.to_string(),
|
||||
serde_json::Value::String(node_name.clone()),
|
||||
@@ -725,6 +847,7 @@ impl SearchByIndex {
|
||||
}
|
||||
};
|
||||
let rtxn = index.read_txn()?;
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
let criteria = index.criteria(&rtxn)?;
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
@@ -751,6 +874,12 @@ impl SearchByIndex {
|
||||
return Err(error);
|
||||
}
|
||||
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||
|
||||
// all queries for an index share the same budget
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
for QueryByIndex { query, weight, query_index } in queries {
|
||||
// use an immediately invoked lambda to capture the result without returning from the function
|
||||
|
||||
@@ -820,17 +949,13 @@ impl SearchByIndex {
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (mut search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
|
||||
&index,
|
||||
&rtxn,
|
||||
&query,
|
||||
&search_kind,
|
||||
time_budget,
|
||||
// clones of `TimeBudget` share the budget rather than restart it
|
||||
time_budget.clone(),
|
||||
params.features,
|
||||
)?;
|
||||
|
||||
@@ -977,6 +1102,7 @@ impl SearchByIndex {
|
||||
})?;
|
||||
self.results_by_index.push(SearchResultByIndex {
|
||||
index: index_uid,
|
||||
primary_key,
|
||||
hits: merged_result,
|
||||
estimated_total_hits,
|
||||
degraded,
|
||||
|
||||
@@ -7,7 +7,7 @@ use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
|
||||
use crate::search::SearchQueryWithIndex;
|
||||
use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER};
|
||||
|
||||
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
||||
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
||||
@@ -98,6 +98,7 @@ pub async fn proxy_search(
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
federation: Federation,
|
||||
params: &ProxySearchParams,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ProxySearchError> {
|
||||
let url = format!("{}/multi-search", node.url);
|
||||
|
||||
@@ -105,7 +106,12 @@ pub async fn proxy_search(
|
||||
|
||||
let search_api_key = node.search_api_key.as_deref();
|
||||
|
||||
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
|
||||
let timeout = std::env::var("MEILI_EXPERIMENTAL_REMOTE_SEARCH_TIMEOUT_SECONDS")
|
||||
.ok()
|
||||
.map(|p| p.parse().unwrap())
|
||||
.unwrap_or(25);
|
||||
|
||||
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout);
|
||||
|
||||
let deadline = if let Some(deadline) = params.deadline {
|
||||
std::time::Instant::min(deadline, max_deadline)
|
||||
@@ -114,7 +120,16 @@ pub async fn proxy_search(
|
||||
};
|
||||
|
||||
for i in 0..params.try_count {
|
||||
match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await {
|
||||
match try_proxy_search(
|
||||
&url,
|
||||
search_api_key,
|
||||
&federated,
|
||||
¶ms.client,
|
||||
deadline,
|
||||
include_metadata,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
let duration = retry.into_duration(i)?;
|
||||
@@ -122,7 +137,7 @@ pub async fn proxy_search(
|
||||
}
|
||||
}
|
||||
}
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline)
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata)
|
||||
.await
|
||||
.map_err(Retry::into_error)
|
||||
}
|
||||
@@ -133,6 +148,7 @@ async fn try_proxy_search(
|
||||
federated: &FederatedSearch,
|
||||
client: &Client,
|
||||
deadline: std::time::Instant,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, Retry> {
|
||||
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
||||
|
||||
@@ -143,6 +159,8 @@ async fn try_proxy_search(
|
||||
request
|
||||
};
|
||||
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
||||
let request =
|
||||
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
|
||||
@@ -16,6 +16,9 @@ use meilisearch_types::milli::order_by_map::OrderByMap;
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
|
||||
use crate::milli::vector::Embedding;
|
||||
@@ -131,6 +134,10 @@ pub struct FederatedSearchResult {
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(default, skip_serializing_if = "FederatedFacets::is_empty")]
|
||||
pub facets_by_index: FederatedFacets,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<Vec<SearchMetadata>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
|
||||
@@ -156,6 +163,8 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
remote_errors,
|
||||
request_uid,
|
||||
metadata,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchResult");
|
||||
@@ -188,6 +197,12 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
if let Some(remote_errors) = remote_errors {
|
||||
debug.field("remote_errors", &remote_errors);
|
||||
}
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ use serde_json::{json, Value};
|
||||
#[cfg(test)]
|
||||
mod mod_test;
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
@@ -56,6 +57,14 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct Personalize {
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub user_context: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
@@ -120,6 +129,8 @@ pub struct SearchQuery {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
pub personalize: Option<Personalize>,
|
||||
}
|
||||
|
||||
impl From<SearchParameters> for SearchQuery {
|
||||
@@ -167,6 +178,7 @@ impl From<SearchParameters> for SearchQuery {
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
locales: None,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -248,6 +260,7 @@ impl fmt::Debug for SearchQuery {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchQuery");
|
||||
@@ -336,6 +349,10 @@ impl fmt::Debug for SearchQuery {
|
||||
debug.field("locales", &locales);
|
||||
}
|
||||
|
||||
if let Some(personalize) = personalize {
|
||||
debug.field("personalize", &personalize);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
@@ -541,6 +558,9 @@ pub struct SearchQueryWithIndex {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
#[serde(skip)]
|
||||
pub personalize: Option<Personalize>,
|
||||
|
||||
#[deserr(default)]
|
||||
pub federation_options: Option<FederationOptions>,
|
||||
@@ -565,6 +585,10 @@ impl SearchQueryWithIndex {
|
||||
self.facets.as_deref().filter(|v| !v.is_empty())
|
||||
}
|
||||
|
||||
pub fn has_personalize(&self) -> bool {
|
||||
self.personalize.is_some()
|
||||
}
|
||||
|
||||
pub fn from_index_query_federation(
|
||||
index_uid: IndexUid,
|
||||
query: SearchQuery,
|
||||
@@ -598,6 +622,7 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
SearchQueryWithIndex {
|
||||
@@ -629,6 +654,7 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
federation_options,
|
||||
}
|
||||
}
|
||||
@@ -664,6 +690,7 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@@ -695,6 +722,7 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@@ -835,6 +863,18 @@ pub struct SearchHit {
|
||||
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct SearchMetadata {
|
||||
pub query_uid: Uuid,
|
||||
pub index_uid: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub remote: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
@@ -851,6 +891,10 @@ pub struct SearchResult {
|
||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<SearchMetadata>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_hit_count: Option<u32>,
|
||||
@@ -872,6 +916,8 @@ impl fmt::Debug for SearchResult {
|
||||
hits_info,
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
request_uid,
|
||||
metadata,
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
@@ -901,6 +947,12 @@ impl fmt::Debug for SearchResult {
|
||||
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||
}
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
@@ -1113,15 +1165,31 @@ pub fn prepare_search<'t>(
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub struct SearchParams {
|
||||
pub index_uid: String,
|
||||
pub query: SearchQuery,
|
||||
pub search_kind: SearchKind,
|
||||
pub retrieve_vectors: RetrieveVectors,
|
||||
pub features: RoFeatures,
|
||||
pub request_uid: Uuid,
|
||||
pub include_metadata: bool,
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index_uid: String,
|
||||
params: SearchParams,
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
) -> Result<(SearchResult, TimeBudget), ResponseError> {
|
||||
let SearchParams {
|
||||
index_uid,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
} = params;
|
||||
let before_search = Instant::now();
|
||||
let index_uid_for_metadata = index_uid.clone();
|
||||
let rtxn = index.read_txn()?;
|
||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
@@ -1129,7 +1197,7 @@ pub fn perform_search(
|
||||
};
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
|
||||
|
||||
let (
|
||||
milli::SearchResult {
|
||||
@@ -1142,7 +1210,20 @@ pub fn perform_search(
|
||||
query_vector,
|
||||
},
|
||||
semantic_hit_count,
|
||||
) = search_from_kind(index_uid, search_kind, search)?;
|
||||
) = search_from_kind(index_uid.clone(), search_kind, search)?;
|
||||
|
||||
let metadata = if include_metadata {
|
||||
let query_uid = Uuid::now_v7();
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
Some(SearchMetadata {
|
||||
query_uid,
|
||||
index_uid: index_uid_for_metadata,
|
||||
primary_key,
|
||||
remote: None, // Local searches don't have a remote
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
@@ -1174,6 +1255,7 @@ pub fn perform_search(
|
||||
attributes_to_search_on: _,
|
||||
filter: _,
|
||||
distinct: _,
|
||||
personalize: _,
|
||||
} = query;
|
||||
|
||||
let format = AttributesFormat {
|
||||
@@ -1225,7 +1307,6 @@ pub fn perform_search(
|
||||
.transpose()?
|
||||
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||
.unzip();
|
||||
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
@@ -1237,8 +1318,10 @@ pub fn perform_search(
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
semantic_hit_count,
|
||||
request_uid: Some(request_uid),
|
||||
metadata,
|
||||
};
|
||||
Ok(result)
|
||||
Ok((result, time_budget))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
@@ -2080,7 +2163,7 @@ pub(crate) fn parse_filter(
|
||||
facets: &Value,
|
||||
filter_parsing_error_code: Code,
|
||||
features: RoFeatures,
|
||||
) -> Result<Option<Filter>, ResponseError> {
|
||||
) -> Result<Option<Filter<'_>>, ResponseError> {
|
||||
let filter = match facets {
|
||||
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
|
||||
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
|
||||
@@ -2117,7 +2200,7 @@ pub(crate) fn parse_filter(
|
||||
Ok(filter)
|
||||
}
|
||||
|
||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
fn parse_filter_array(arr: &'_ [Value]) -> Result<Option<Filter<'_>>, MeilisearchHttpError> {
|
||||
let mut ands = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
|
||||
@@ -419,14 +419,14 @@ async fn error_add_api_key_invalid_parameters_actions() {
|
||||
let (response, code) = server.add_api_key(content).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
|
||||
{
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`, `indexes.compact`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -91,14 +91,14 @@ async fn create_api_key_bad_actions() {
|
||||
// can't parse
|
||||
let (response, code) = server.add_api_key(json!({ "actions": ["doggo"] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`, `indexes.compact`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -1040,7 +1040,7 @@ async fn error_single_search_forbidden_token() {
|
||||
];
|
||||
|
||||
let failed_query_indexes: Vec<_> =
|
||||
std::iter::repeat(Some(0)).take(5).chain(std::iter::repeat(None).take(6)).collect();
|
||||
std::iter::repeat_n(Some(0), 5).chain(std::iter::repeat_n(None, 6)).collect();
|
||||
|
||||
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_SINGLE.len()];
|
||||
|
||||
@@ -1118,10 +1118,9 @@ async fn error_multi_search_forbidden_token() {
|
||||
},
|
||||
];
|
||||
|
||||
let failed_query_indexes: Vec<_> = std::iter::repeat(Some(0))
|
||||
.take(5)
|
||||
.chain(std::iter::repeat(Some(1)).take(5))
|
||||
.chain(std::iter::repeat(None).take(6))
|
||||
let failed_query_indexes: Vec<_> = std::iter::repeat_n(Some(0), 5)
|
||||
.chain(std::iter::repeat_n(Some(1), 5))
|
||||
.chain(std::iter::repeat_n(None, 6))
|
||||
.collect();
|
||||
|
||||
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_BOTH.len()];
|
||||
|
||||
@@ -40,14 +40,14 @@ async fn batch_bad_types() {
|
||||
|
||||
let (response, code) = server.batches_filter("types=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -516,6 +516,18 @@ impl<State> Index<'_, State> {
|
||||
self.service.post_encoded(url, query, self.encoder).await
|
||||
}
|
||||
|
||||
pub async fn search_with_headers(
|
||||
&self,
|
||||
query: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref()));
|
||||
let body = serde_json::to_string(&query).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str(url, body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
|
||||
self.service.get(url).await
|
||||
|
||||
@@ -522,6 +522,26 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn shared_index_geojson_documents() -> &'static Index<'static, Shared> {
|
||||
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
|
||||
INDEX
|
||||
.get_or_init(|| async {
|
||||
// Retrieved from https://gitlab-forge.din.developpement-durable.gouv.fr/pub/geomatique/descartes/d-map/-/blob/main/demo/examples/commons/countries.geojson?ref_type=heads
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
|
||||
let countries = include_str!("../documents/geojson/assets/countries.json");
|
||||
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
|
||||
let (response, _code) = index._add_documents(Value(lille), Some("name")).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (response, _code) =
|
||||
index._update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
index
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
|
||||
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
|
||||
let (server, uid) = INDEX
|
||||
|
||||
@@ -49,8 +49,8 @@ impl Server<Owned> {
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -65,7 +65,9 @@ impl Server<Owned> {
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -78,7 +80,9 @@ impl Server<Owned> {
|
||||
}
|
||||
|
||||
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options)?;
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle)?;
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Ok(Server { service, _dir: None, _marker: PhantomData })
|
||||
@@ -217,8 +221,9 @@ impl Server<Shared> {
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, api_key: None, options };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -390,6 +395,17 @@ impl<State> Server<State> {
|
||||
self.service.post("/multi-search", queries).await
|
||||
}
|
||||
|
||||
pub async fn multi_search_with_headers(
|
||||
&self,
|
||||
queries: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let body = serde_json::to_string(&queries).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str("/multi-search", body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
|
||||
self.service.get(format!("/indexes{parameters}")).await
|
||||
}
|
||||
@@ -490,6 +506,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
|
||||
experimental_no_edition_2024_for_settings: false,
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
|
||||
@@ -10,8 +10,9 @@ use actix_web::test::TestRequest;
|
||||
use actix_web::web::Data;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::personalization::PersonalizationService;
|
||||
use meilisearch::search_queue::SearchQueue;
|
||||
use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
|
||||
use meilisearch::{create_app, Opt, ServicesData, SubscriberForSecondLayer};
|
||||
use meilisearch_auth::AuthController;
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::Layer;
|
||||
@@ -135,14 +136,24 @@ impl Service {
|
||||
self.options.experimental_search_queue_size,
|
||||
NonZeroUsize::new(1).unwrap(),
|
||||
);
|
||||
let personalization_service = self
|
||||
.options
|
||||
.experimental_personalization_api_key
|
||||
.clone()
|
||||
.map(PersonalizationService::cohere)
|
||||
.unwrap_or_else(PersonalizationService::disabled);
|
||||
|
||||
actix_web::test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone().into(),
|
||||
Data::new(search_queue),
|
||||
ServicesData {
|
||||
index_scheduler: self.index_scheduler.clone().into(),
|
||||
auth: self.auth.clone().into(),
|
||||
search_queue: Data::new(search_queue),
|
||||
personalization_service: Data::new(personalization_service),
|
||||
logs_route_handle: Data::new(route_layer_handle),
|
||||
logs_stderr_handle: Data::new(stderr_layer_handle),
|
||||
analytics: Data::new(Analytics::no_analytics()),
|
||||
},
|
||||
self.options.clone(),
|
||||
(route_layer_handle, stderr_layer_handle),
|
||||
Data::new(Analytics::no_analytics()),
|
||||
true,
|
||||
))
|
||||
.await
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
|
||||
use crate::json;
|
||||
use actix_web::test;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
@@ -8,6 +5,10 @@ use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
|
||||
use crate::json;
|
||||
|
||||
/// This is the basic usage of our API and every other tests uses the content-type application/json
|
||||
#[actix_rt::test]
|
||||
async fn add_documents_test_json_content_types() {
|
||||
@@ -1852,7 +1853,7 @@ async fn add_documents_with_geo_field() {
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }),
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
@@ -1884,7 +1885,8 @@ async fn add_documents_with_geo_field() {
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 4
|
||||
"estimatedTotalHits": 4,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@@ -1939,7 +1941,7 @@ async fn update_documents_with_geo_field() {
|
||||
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }),
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
@@ -1971,7 +1973,8 @@ async fn update_documents_with_geo_field() {
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 4
|
||||
"estimatedTotalHits": 4,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2043,7 +2046,7 @@ async fn update_documents_with_geo_field() {
|
||||
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }),
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
@@ -2076,7 +2079,8 @@ async fn update_documents_with_geo_field() {
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 4
|
||||
"estimatedTotalHits": 4,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -134,14 +134,14 @@ async fn get_all_documents_bad_filter() {
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `doggo`.\n1:6 doggo",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
}
|
||||
"###);
|
||||
"#);
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@@ -523,14 +523,14 @@ async fn delete_document_by_filter() {
|
||||
// send bad filter
|
||||
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
snapshot!(response, @r#"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `hello`.\n1:6 hello",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
}
|
||||
"###);
|
||||
"#);
|
||||
|
||||
// send empty filter
|
||||
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
|
||||
@@ -724,14 +724,14 @@ async fn fetch_document_by_filter() {
|
||||
|
||||
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
snapshot!(response, @r#"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `cool doggo`.\n1:11 cool doggo",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
}
|
||||
"###);
|
||||
"#);
|
||||
|
||||
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
|
||||
180
crates/meilisearch/tests/documents/geojson/assets/countries.json
Normal file
180
crates/meilisearch/tests/documents/geojson/assets/countries.json
Normal file
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user