mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-22 04:36:32 +00:00
Compare commits
307 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ea70a7d1c9 | ||
|
|
9304f8e586 | ||
|
|
495db080ec | ||
|
|
d71341fa48 | ||
|
|
5b3070d8c3 | ||
|
|
89006fd4b3 | ||
|
|
49f50a0a21 | ||
|
|
1104f00803 | ||
|
|
33fa564a9c | ||
|
|
a097b254f8 | ||
|
|
54cb0ec437 | ||
|
|
38ed1f1dbb | ||
|
|
643dd33358 | ||
|
|
32f9fb6ab2 | ||
|
|
b5966f82e8 | ||
|
|
5e54063aab | ||
|
|
40456795d0 | ||
|
|
40e60c6f52 | ||
|
|
eeae6383d0 | ||
|
|
8cbcaeff56 | ||
|
|
ce87d5a89e | ||
|
|
9f7172f6ab | ||
|
|
d6eca83cfa | ||
|
|
a9d6e86077 | ||
|
|
346f9efe3a | ||
|
|
a987d698c1 | ||
|
|
fc3508c8c8 | ||
|
|
dbb45dec1a | ||
|
|
5f69a43846 | ||
|
|
fe1e4814fa | ||
|
|
c29749741b | ||
|
|
3e47201365 | ||
|
|
ec9719f3b1 | ||
|
|
b2cc9e4db8 | ||
|
|
56198bae48 | ||
|
|
888059b2d0 | ||
|
|
410f2fc8c3 | ||
|
|
54e244d2f3 | ||
|
|
e0c36972fb | ||
|
|
daadcddb5e | ||
|
|
7f92dafa02 | ||
|
|
cc5d12a368 | ||
|
|
0f98b996b5 | ||
|
|
d005ca5bf7 | ||
|
|
7e65fb1d3e | ||
|
|
cdefb3f665 | ||
|
|
a91887221a | ||
|
|
9c66b20a97 | ||
|
|
a48283527e | ||
|
|
73f78c19b0 | ||
|
|
34639e346e | ||
|
|
7af2a254d6 | ||
|
|
0f9d262a1c | ||
|
|
747476a225 | ||
|
|
34765b556b | ||
|
|
dfb4860578 | ||
|
|
ce62713f02 | ||
|
|
8b5d04d60f | ||
|
|
1b74709b91 | ||
|
|
a5c0a282c5 | ||
|
|
4fc048ff20 | ||
|
|
375b5600cd | ||
|
|
32b997d817 | ||
|
|
ff3090e3cc | ||
|
|
6c6645f945 | ||
|
|
af6473d999 | ||
|
|
11851f9701 | ||
|
|
cc4654eabd | ||
|
|
0bb91f4a77 | ||
|
|
f9d57f54df | ||
|
|
3ef1afc0f1 | ||
|
|
dbb5abebb6 | ||
|
|
700f33bd39 | ||
|
|
d01bbbccde | ||
|
|
4fc506f267 | ||
|
|
dc456276e5 | ||
|
|
b2ea50cb10 | ||
|
|
5074cf92ab | ||
|
|
a92bc8d192 | ||
|
|
ee538cf045 | ||
|
|
2b05d63a0f | ||
|
|
104e8918ce | ||
|
|
d6ec4d4f4a | ||
|
|
f0e7326b7a | ||
|
|
c8106a0006 | ||
|
|
c9ab5bc0b6 | ||
|
|
5e0f15fd43 | ||
|
|
4c30f090c7 | ||
|
|
63f247cdda | ||
|
|
e109fa9529 | ||
|
|
76e4ec2168 | ||
|
|
982babdb74 | ||
|
|
7ae2ae33d9 | ||
|
|
cb0788ae07 | ||
|
|
cb3e5dc234 | ||
|
|
59d40a2821 | ||
|
|
98a678e73d | ||
|
|
70292aae3c | ||
|
|
73521f0069 | ||
|
|
4533179604 | ||
|
|
1a21cc1a17 | ||
|
|
d08042f8a7 | ||
|
|
77aadb5f22 | ||
|
|
4fd913f7eb | ||
|
|
4b72e54ca7 | ||
|
|
adef2cc132 | ||
|
|
533b9951b1 | ||
|
|
9103cbc9db | ||
|
|
083de2bfc1 | ||
|
|
8618a4d2ba | ||
|
|
08bc982748 | ||
|
|
e9c5df7993 | ||
|
|
8a28b3aa77 | ||
|
|
1a0b100ad9 | ||
|
|
ff93563f41 | ||
|
|
2f25258191 | ||
|
|
2859079c32 | ||
|
|
74b83d305f | ||
|
|
70f6e4b828 | ||
|
|
6df196034e | ||
|
|
a63762737c | ||
|
|
77394bd4b9 | ||
|
|
cb87201c8b | ||
|
|
1a9c38794f | ||
|
|
34233efb63 | ||
|
|
af0608ebd6 | ||
|
|
8c7e5c094e | ||
|
|
c064737137 | ||
|
|
1d188a7ad3 | ||
|
|
66a6b65716 | ||
|
|
326652a399 | ||
|
|
59316e8d5a | ||
|
|
76d7f20c87 | ||
|
|
380b2797a5 | ||
|
|
1dd58f9bec | ||
|
|
ddc76ad0dc | ||
|
|
ffacf1c002 | ||
|
|
5a49b93b77 | ||
|
|
918a6eaec9 | ||
|
|
1e6ce70e3e | ||
|
|
b418054ee4 | ||
|
|
58f30e9d8a | ||
|
|
c45172a4bf | ||
|
|
221ba20083 | ||
|
|
93c5fbbb8b | ||
|
|
22d529523a | ||
|
|
ed6f479940 | ||
|
|
f19f712433 | ||
|
|
24a92c2809 | ||
|
|
443cc24408 | ||
|
|
e8d5228250 | ||
|
|
5c33fb090c | ||
|
|
48dd9146e7 | ||
|
|
c1c42e818e | ||
|
|
519905ef9c | ||
|
|
f242377d2b | ||
|
|
da06306274 | ||
|
|
b93b803a2e | ||
|
|
cf43ec4aff | ||
|
|
9795d98e77 | ||
|
|
316b4c047f | ||
|
|
1d701c6980 | ||
|
|
0203adb9cb | ||
|
|
0d05c2ad6e | ||
|
|
b3f44c4abd | ||
|
|
62115f57b1 | ||
|
|
9023172139 | ||
|
|
59631afd9a | ||
|
|
c2584c6edd | ||
|
|
685663af3c | ||
|
|
72b4b41443 | ||
|
|
70aa768d48 | ||
|
|
6029677eec | ||
|
|
3c78f4121e | ||
|
|
89170dd78f | ||
|
|
6379a62d95 | ||
|
|
4c05c0cf96 | ||
|
|
ce832da16c | ||
|
|
14de657d36 | ||
|
|
9a36c090bf | ||
|
|
3aca010b42 | ||
|
|
62c11ce3f3 | ||
|
|
f358538f4f | ||
|
|
9068857ba1 | ||
|
|
d241157084 | ||
|
|
69f73b1d74 | ||
|
|
202794f620 | ||
|
|
38cbd54604 | ||
|
|
3877e0043c | ||
|
|
f95398420b | ||
|
|
53905c1362 | ||
|
|
113aac8815 | ||
|
|
d2071dde1f | ||
|
|
4502af5aed | ||
|
|
06af68aa07 | ||
|
|
6d378c6397 | ||
|
|
ec0c0cf779 | ||
|
|
851694e323 | ||
|
|
ea92c64fdc | ||
|
|
dc36f681be | ||
|
|
48f1987a8d | ||
|
|
b98e2cef81 | ||
|
|
9f79ce82af | ||
|
|
5f18a9b2ee | ||
|
|
7f8a1ac0be | ||
|
|
1a67163ee8 | ||
|
|
38141de68d | ||
|
|
7a98b80687 | ||
|
|
229a12c8e6 | ||
|
|
2fdfe79400 | ||
|
|
9184b12a26 | ||
|
|
742378d8e1 | ||
|
|
6dcd739a8b | ||
|
|
f97384da6c | ||
|
|
6ea76f2771 | ||
|
|
715b255371 | ||
|
|
db094d3923 | ||
|
|
c29bdcae23 | ||
|
|
75219181a3 | ||
|
|
a5b5cf7cd1 | ||
|
|
142ba8ea00 | ||
|
|
4bc823e07c | ||
|
|
db06ca7138 | ||
|
|
95595a768e | ||
|
|
36f649768e | ||
|
|
0c6fc243f2 | ||
|
|
dfc46d5627 | ||
|
|
11d55f2121 | ||
|
|
014da57cf6 | ||
|
|
70a0ff4a8f | ||
|
|
dd0d5e4b90 | ||
|
|
15b3bb1700 | ||
|
|
077ec2ab11 | ||
|
|
f25db0795e | ||
|
|
c50a337c29 | ||
|
|
efeae09ce1 | ||
|
|
ad55b48664 | ||
|
|
94eabd34e6 | ||
|
|
6935589f74 | ||
|
|
4beb452027 | ||
|
|
b722da303a | ||
|
|
ad39263b94 | ||
|
|
0ffb08b112 | ||
|
|
ff80b4d0ff | ||
|
|
7fb4404928 | ||
|
|
8405f0bf9c | ||
|
|
3a7f9b56fe | ||
|
|
61034e2e2e | ||
|
|
108d6d3344 | ||
|
|
35bd00f6a1 | ||
|
|
69059d67ef | ||
|
|
e13783103f | ||
|
|
f719665c4e | ||
|
|
638f284614 | ||
|
|
32ac98ed95 | ||
|
|
46aee695ca | ||
|
|
716c67f858 | ||
|
|
fec10bb2d6 | ||
|
|
3dac2cf73e | ||
|
|
03eca800e6 | ||
|
|
28fa2e960e | ||
|
|
a3b9220f84 | ||
|
|
c09d48edf2 | ||
|
|
ae4ab0ebbb | ||
|
|
900a9a6d59 | ||
|
|
fc560e6730 | ||
|
|
e2a06470b7 | ||
|
|
ada27323f2 | ||
|
|
607a1c2395 | ||
|
|
b56956ea0c | ||
|
|
3d21290f7f | ||
|
|
4edd4c06bc | ||
|
|
566baddc6b | ||
|
|
febe3186ce | ||
|
|
5dd42c1871 | ||
|
|
8670793e6e | ||
|
|
41a04aa3ab | ||
|
|
88f841bc05 | ||
|
|
d19892d2ea | ||
|
|
c0905d6650 | ||
|
|
576d7d94b1 | ||
|
|
f4f1334b62 | ||
|
|
aaff6c3685 | ||
|
|
42d2af4c84 | ||
|
|
6be91c824c | ||
|
|
6ee0537db8 | ||
|
|
3fbeff4308 | ||
|
|
375546b61a | ||
|
|
25a1d50763 | ||
|
|
6f0d26c22c | ||
|
|
4fe073cc1a | ||
|
|
5cd3d36d20 | ||
|
|
9f4dcd04e9 | ||
|
|
d7ad76ea1e | ||
|
|
e82bb93221 | ||
|
|
000cb93aad | ||
|
|
ad4f5514b9 | ||
|
|
8d29a29867 | ||
|
|
d7de819d11 | ||
|
|
7a6cf30cb2 | ||
|
|
e43d67591c | ||
|
|
134237d1eb | ||
|
|
26d9070aa7 | ||
|
|
f9ffb8ada5 | ||
|
|
a47888f02c | ||
|
|
5bef2f4d86 | ||
|
|
d52c7dcc94 |
1
.github/dependabot.yml
vendored
1
.github/dependabot.yml
vendored
@@ -7,6 +7,5 @@ updates:
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
labels:
|
||||
- 'skip changelog'
|
||||
- 'dependencies'
|
||||
rebase-strategy: disabled
|
||||
|
||||
33
.github/release-draft-template.yml
vendored
33
.github/release-draft-template.yml
vendored
@@ -1,33 +0,0 @@
|
||||
name-template: 'v$RESOLVED_VERSION'
|
||||
tag-template: 'v$RESOLVED_VERSION'
|
||||
exclude-labels:
|
||||
- 'skip changelog'
|
||||
version-resolver:
|
||||
minor:
|
||||
labels:
|
||||
- 'enhancement'
|
||||
default: patch
|
||||
categories:
|
||||
- title: '⚠️ Breaking changes'
|
||||
label: 'breaking-change'
|
||||
- title: '🚀 Enhancements'
|
||||
label: 'enhancement'
|
||||
- title: '🐛 Bug Fixes'
|
||||
label: 'bug'
|
||||
- title: '🔒 Security'
|
||||
label: 'security'
|
||||
- title: '⚙️ Maintenance/misc'
|
||||
label:
|
||||
- 'maintenance'
|
||||
- 'documentation'
|
||||
template: |
|
||||
$CHANGES
|
||||
|
||||
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
|
||||
no-changes-template: 'Changes are coming soon 😎'
|
||||
sort-direction: 'ascending'
|
||||
replacers:
|
||||
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/bench-pr.yml
vendored
2
.github/workflows/bench-pr.yml
vendored
@@ -66,7 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/bench-push-indexing.yml
vendored
2
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-pr.yml
vendored
2
.github/workflows/benchmarks-pr.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/flaky-tests.yml
vendored
2
.github/workflows/flaky-tests.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
|
||||
2
.github/workflows/fuzzer-indexing.yml
vendored
2
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
2
.github/workflows/publish-docker-images.yml
vendored
2
.github/workflows/publish-docker-images.yml
vendored
@@ -65,7 +65,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
|
||||
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
|
||||
21
.github/workflows/publish-release-assets.yml
vendored
21
.github/workflows/publish-release-assets.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
check-version:
|
||||
name: Check the version validity
|
||||
runs-on: ubuntu-latest
|
||||
# No need to check the version for dry run (cron)
|
||||
# No need to check the version for dry run (cron or workflow_dispatch)
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
# Check if the tag has the v<nmumber>.<number>.<number> format.
|
||||
@@ -45,10 +45,10 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
@@ -75,10 +75,10 @@ jobs:
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
command: build
|
||||
args: --release --target ${{ matrix.target }}
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@@ -176,7 +176,7 @@ jobs:
|
||||
- name: List target output files
|
||||
run: ls -lR ./target
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
@@ -187,6 +187,7 @@ jobs:
|
||||
|
||||
publish-openapi-file:
|
||||
name: Publish OpenAPI file
|
||||
needs: check-version
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -201,7 +202,7 @@ jobs:
|
||||
cd crates/openapi-generator
|
||||
cargo run --release -- --pretty --output ../../meilisearch.json
|
||||
- name: Upload OpenAPI to Release
|
||||
# No need to upload for dry run (cron)
|
||||
# No need to upload for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
|
||||
20
.github/workflows/release-drafter.yml
vendored
20
.github/workflows/release-drafter.yml
vendored
@@ -1,20 +0,0 @@
|
||||
name: Release Drafter
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
update_release_draft:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: release-drafter/release-drafter@v6
|
||||
with:
|
||||
config-name: release-draft-template.yml
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFTER_TOKEN }}
|
||||
16
.github/workflows/sdks-tests.yml
vendored
16
.github/workflows/sdks-tests.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-dotnet
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v4
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: "8.0.x"
|
||||
- name: Install dependencies
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v5
|
||||
@@ -135,13 +135,13 @@ jobs:
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v5
|
||||
with:
|
||||
java-version: 8
|
||||
distribution: 'zulu'
|
||||
java-version: 17
|
||||
distribution: 'temurin'
|
||||
cache: gradle
|
||||
- name: Grant execute permission for gradlew
|
||||
run: chmod +x gradlew
|
||||
- name: Build and run unit and integration tests
|
||||
run: ./gradlew build integrationTest
|
||||
run: ./gradlew build integrationTest --info
|
||||
|
||||
meilisearch-js-tests:
|
||||
needs: define-docker-image
|
||||
@@ -160,7 +160,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
@@ -224,7 +224,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
- name: Install pipenv
|
||||
uses: dschep/install-pipenv-action@v1
|
||||
- name: Install dependencies
|
||||
@@ -318,7 +318,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
|
||||
14
.github/workflows/test-suite.yml
vendored
14
.github/workflows/test-suite.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo check without any default features
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@@ -153,7 +153,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run tests in debug
|
||||
@@ -167,7 +167,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Install sd
|
||||
|
||||
1445
Cargo.lock
generated
1445
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.21.0"
|
||||
version = "1.26.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.85-alpine3.20 AS compiler
|
||||
FROM rust:1.89-alpine3.22 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
@@ -20,7 +20,7 @@ RUN set -eux; \
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
|
||||
# Run
|
||||
FROM alpine:3.20
|
||||
FROM alpine:3.22
|
||||
LABEL org.opencontainers.image.source="https://github.com/meilisearch/meilisearch"
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
|
||||
28
LICENSE
28
LICENSE
@@ -1,29 +1,9 @@
|
||||
MIT License
|
||||
# License
|
||||
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Part of this work fall under the Meilisearch Enterprise Edition (EE) and are licensed under the Business Source License 1.1, please refer to [LICENSE-EE](./LICENSE-EE) for details.
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
The other parts of this work are licensed under the [MIT license](./LICENSE-MIT).
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
🔒 Meilisearch Enterprise Edition (EE)
|
||||
|
||||
Certain parts of this codebase are not licensed under the MIT license and governed by the Business Source License 1.1.
|
||||
|
||||
See the LICENSE-EE file for details.
|
||||
`SPDX-License-Identifier: MIT AND BUSL-1.1`
|
||||
21
LICENSE-MIT
Normal file
21
LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -39,6 +39,7 @@
|
||||
## 🖥 Examples
|
||||
|
||||
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
|
||||
- [**Flickr**](https://flickr.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — Search and explore one hundred million Flickr images with semantic search.
|
||||
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
|
||||
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
|
||||
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
|
||||
@@ -121,7 +122,7 @@ If you want to know more about the kind of data we collect and what we use it fo
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.meilisearch.com/careers), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
🗞 [Subscribe to our newsletter](https://share-eu1.hsforms.com/1LN5N0x_GQgq7ss7tXmSykwfg3aq) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
💌 Want to make a suggestion or give feedback? Here are some of the channels where you can reach us:
|
||||
|
||||
|
||||
@@ -96,6 +96,8 @@ pub struct TaskDump {
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
||||
@@ -158,6 +160,9 @@ pub enum KindDump {
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -175,6 +180,7 @@ impl From<Task> for TaskDump {
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network,
|
||||
custom_metadata: task.custom_metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -240,6 +246,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
KindWithContent::IndexCompaction { index_uid } => {
|
||||
KindDump::IndexCompaction { index_uid }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -390,6 +399,7 @@ pub(crate) mod test {
|
||||
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
@@ -415,6 +425,7 @@ pub(crate) mod test {
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
Some(vec![
|
||||
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
|
||||
@@ -435,6 +446,7 @@ pub(crate) mod test {
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
@@ -97,6 +97,7 @@ impl CompatV2ToV3 {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum CompatIndexV2ToV3 {
|
||||
V2(v2::V2IndexReader),
|
||||
Compat(Box<CompatIndexV1ToV2>),
|
||||
|
||||
@@ -164,6 +164,7 @@ impl CompatV5ToV6 {
|
||||
started_at: task_view.started_at,
|
||||
finished_at: task_view.finished_at,
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
};
|
||||
|
||||
(task, content_file)
|
||||
|
||||
@@ -60,7 +60,7 @@ impl FileStore {
|
||||
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
|
||||
let path = self.get_update_path(uuid);
|
||||
let path = self.update_path(uuid);
|
||||
let file = match StdFile::open(path) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
@@ -72,7 +72,7 @@ impl FileStore {
|
||||
}
|
||||
|
||||
/// Returns the path that correspond to this uuid, the path could not exists.
|
||||
pub fn get_update_path(&self, uuid: Uuid) -> PathBuf {
|
||||
pub fn update_path(&self, uuid: Uuid) -> PathBuf {
|
||||
self.path.join(uuid.to_string())
|
||||
}
|
||||
|
||||
|
||||
@@ -7,23 +7,14 @@
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::char;
|
||||
use nom::character::complete::multispace0;
|
||||
use nom::character::complete::multispace1;
|
||||
use nom::combinator::cut;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::value;
|
||||
use nom::sequence::preceded;
|
||||
use nom::sequence::{terminated, tuple};
|
||||
use nom::character::complete::{char, multispace0, multispace1};
|
||||
use nom::combinator::{cut, map, value};
|
||||
use nom::sequence::{preceded, terminated, tuple};
|
||||
use Condition::*;
|
||||
|
||||
use crate::error::IResultExt;
|
||||
use crate::value::parse_vector_value;
|
||||
use crate::value::parse_vector_value_cut;
|
||||
use crate::Error;
|
||||
use crate::ErrorKind;
|
||||
use crate::VectorFilter;
|
||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
||||
use crate::value::{parse_vector_value, parse_vector_value_cut};
|
||||
use crate::{parse_value, Error, ErrorKind, FilterCondition, IResult, Span, Token, VectorFilter};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Condition<'a> {
|
||||
@@ -124,7 +115,7 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
||||
}
|
||||
|
||||
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
|
||||
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter)> {
|
||||
let (input, _) = multispace0(input)?;
|
||||
let (input, fid) = tag("_vectors")(input)?;
|
||||
|
||||
|
||||
@@ -75,7 +75,11 @@ pub enum ExpectedValueKind {
|
||||
pub enum ErrorKind<'a> {
|
||||
ReservedGeo(&'a str),
|
||||
GeoRadius,
|
||||
GeoRadiusArgumentCount(usize),
|
||||
GeoBoundingBox,
|
||||
GeoPolygon,
|
||||
GeoPolygonNotEnoughPoints(usize),
|
||||
GeoCoordinatesNotPair(usize),
|
||||
MisusedGeoRadius,
|
||||
MisusedGeoBoundingBox,
|
||||
VectorFilterLeftover,
|
||||
@@ -189,7 +193,7 @@ impl Display for Error<'_> {
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` {text}")?
|
||||
}
|
||||
ErrorKind::InvalidEscapedNumber => {
|
||||
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
||||
@@ -198,11 +202,23 @@ impl Display for Error<'_> {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
}
|
||||
ErrorKind::GeoRadius => {
|
||||
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
|
||||
writeln!(f, "The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.")?
|
||||
}
|
||||
ErrorKind::GeoRadiusArgumentCount(count) => {
|
||||
writeln!(f, "Was expecting 3 or 4 arguments for `_geoRadius`, but instead found {count}.")?
|
||||
}
|
||||
ErrorKind::GeoBoundingBox => {
|
||||
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
|
||||
}
|
||||
ErrorKind::GeoPolygon => {
|
||||
writeln!(f, "The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.")?
|
||||
}
|
||||
ErrorKind::GeoPolygonNotEnoughPoints(n) => {
|
||||
writeln!(f, "The `_geoPolygon` filter expects at least 3 points but only {n} were specified")?;
|
||||
}
|
||||
ErrorKind::GeoCoordinatesNotPair(number) => {
|
||||
writeln!(f, "Was expecting 2 coordinates but instead found {number}.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
//! word = (alphanumeric | _ | - | .)+
|
||||
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
|
||||
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
|
||||
//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
|
||||
//! ```
|
||||
//!
|
||||
//! Other BNF grammar used to handle some specific errors:
|
||||
@@ -116,7 +117,7 @@ impl<'a> Token<'a> {
|
||||
self.span
|
||||
}
|
||||
|
||||
pub fn parse_finite_float(&self) -> Result<f64, Error> {
|
||||
pub fn parse_finite_float(&self) -> Result<f64, Error<'a>> {
|
||||
let value: f64 = self.value().parse().map_err(|e| self.as_external_error(e))?;
|
||||
if value.is_finite() {
|
||||
Ok(value)
|
||||
@@ -156,8 +157,9 @@ pub enum FilterCondition<'a> {
|
||||
Or(Vec<Self>),
|
||||
And(Vec<Self>),
|
||||
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a>, resolution: Option<Token<'a>> },
|
||||
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
||||
GeoPolygon { points: Vec<[Token<'a>; 2]> },
|
||||
}
|
||||
|
||||
pub enum TraversedElement<'a> {
|
||||
@@ -166,7 +168,7 @@ pub enum TraversedElement<'a> {
|
||||
}
|
||||
|
||||
impl<'a> FilterCondition<'a> {
|
||||
pub fn use_contains_operator(&self) -> Option<&Token> {
|
||||
pub fn use_contains_operator(&self) -> Option<&Token<'a>> {
|
||||
match self {
|
||||
FilterCondition::Condition { fid: _, op } => match op {
|
||||
Condition::GreaterThan(_)
|
||||
@@ -189,11 +191,12 @@ impl<'a> FilterCondition<'a> {
|
||||
FilterCondition::VectorExists { .. }
|
||||
| FilterCondition::GeoLowerThan { .. }
|
||||
| FilterCondition::GeoBoundingBox { .. }
|
||||
| FilterCondition::GeoPolygon { .. }
|
||||
| FilterCondition::In { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn use_vector_filter(&self) -> Option<&Token> {
|
||||
pub fn use_vector_filter(&self) -> Option<&Token<'a>> {
|
||||
match self {
|
||||
FilterCondition::Condition { .. } => None,
|
||||
FilterCondition::Not(this) => this.use_vector_filter(),
|
||||
@@ -202,12 +205,13 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
FilterCondition::GeoLowerThan { .. }
|
||||
| FilterCondition::GeoBoundingBox { .. }
|
||||
| FilterCondition::GeoPolygon { .. }
|
||||
| FilterCondition::In { .. } => None,
|
||||
FilterCondition::VectorExists { fid, .. } => Some(fid),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token<'a>> + '_> {
|
||||
if depth == 0 {
|
||||
return Box::new(std::iter::empty());
|
||||
}
|
||||
@@ -228,7 +232,7 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
|
||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token<'a>> {
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid),
|
||||
FilterCondition::Or(subfilters) => {
|
||||
@@ -396,23 +400,27 @@ fn parse_not(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to allow space BEFORE the _geoRadius but not after
|
||||
let parsed = preceded(
|
||||
tuple((multispace0, word_exact("_geoRadius"))),
|
||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
)(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius)));
|
||||
|
||||
let (input, _) = tuple((multispace0, word_exact("_geoRadius")))(input)?;
|
||||
|
||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
||||
|
||||
let parsed =
|
||||
delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))(input)
|
||||
.map_cut(ErrorKind::GeoRadius);
|
||||
|
||||
let (input, args) = parsed?;
|
||||
|
||||
if args.len() != 3 {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius)));
|
||||
if !(3..=4).contains(&args.len()) {
|
||||
return Err(Error::failure_from_kind(input, ErrorKind::GeoRadiusArgumentCount(args.len())));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoLowerThan {
|
||||
point: [args[0].into(), args[1].into()],
|
||||
radius: args[2].into(),
|
||||
resolution: args.get(3).cloned().map(Token::from),
|
||||
};
|
||||
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
@@ -420,26 +428,33 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||
/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression.
|
||||
fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to allow space BEFORE the _geoBoundingBox but not after
|
||||
let parsed = preceded(
|
||||
tuple((multispace0, word_exact("_geoBoundingBox"))),
|
||||
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
|
||||
cut(delimited(
|
||||
char('('),
|
||||
separated_list1(
|
||||
tag(","),
|
||||
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
||||
),
|
||||
char(')'),
|
||||
)),
|
||||
|
||||
let (input, _) = tuple((multispace0, word_exact("_geoBoundingBox")))(input)?;
|
||||
|
||||
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
|
||||
|
||||
let (input, args) = delimited(
|
||||
char('('),
|
||||
separated_list1(
|
||||
tag(","),
|
||||
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
||||
),
|
||||
char(')'),
|
||||
)(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
|
||||
.map_cut(ErrorKind::GeoBoundingBox)?;
|
||||
|
||||
let (input, args) = parsed?;
|
||||
|
||||
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
|
||||
if args.len() != 2 {
|
||||
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
|
||||
}
|
||||
|
||||
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
|
||||
let context = offending.first().unwrap_or(&input);
|
||||
return Err(Error::failure_from_kind(
|
||||
*context,
|
||||
ErrorKind::GeoCoordinatesNotPair(offending.len()),
|
||||
));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoBoundingBox {
|
||||
top_right_point: [args[0][0].into(), args[0][1].into()],
|
||||
bottom_left_point: [args[1][0].into(), args[1][1].into()],
|
||||
@@ -447,6 +462,47 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
|
||||
/// If we parse `_geoPolygon` we MUST parse the rest of the expression.
|
||||
fn parse_geo_polygon(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to allow space BEFORE the _geoPolygon but not after
|
||||
|
||||
let (input, _) = tuple((multispace0, word_exact("_geoPolygon")))(input)?;
|
||||
|
||||
// if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure
|
||||
|
||||
let (input, args): (_, Vec<Vec<LocatedSpan<_, _>>>) = delimited(
|
||||
char('('),
|
||||
separated_list1(
|
||||
tag(","),
|
||||
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
||||
),
|
||||
preceded(opt(ws(char(','))), char(')')), // Tolerate trailing comma
|
||||
)(input)
|
||||
.map_cut(ErrorKind::GeoPolygon)?;
|
||||
|
||||
if args.len() < 3 {
|
||||
let context = args.last().and_then(|a| a.last()).unwrap_or(&input);
|
||||
return Err(Error::failure_from_kind(
|
||||
*context,
|
||||
ErrorKind::GeoPolygonNotEnoughPoints(args.len()),
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
|
||||
let context = offending.first().unwrap_or(&input);
|
||||
return Err(Error::failure_from_kind(
|
||||
*context,
|
||||
ErrorKind::GeoCoordinatesNotPair(offending.len()),
|
||||
));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoPolygon {
|
||||
points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(),
|
||||
};
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoPoint but not after
|
||||
@@ -516,8 +572,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
||||
}),
|
||||
),
|
||||
parse_geo_radius,
|
||||
parse_geo_bounding_box,
|
||||
// Made a random block of functions because we reached the maximum number of elements per alt
|
||||
alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)),
|
||||
parse_in,
|
||||
parse_not_in,
|
||||
parse_condition,
|
||||
@@ -597,9 +653,12 @@ impl std::fmt::Display for FilterCondition<'_> {
|
||||
}
|
||||
write!(f, " EXISTS")
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
FilterCondition::GeoLowerThan { point, radius, resolution: None } => {
|
||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius, resolution: Some(resolution) } => {
|
||||
write!(f, "_geoRadius({}, {}, {}, {})", point[0], point[1], radius, resolution)
|
||||
}
|
||||
FilterCondition::GeoBoundingBox {
|
||||
top_right_point: top_left_point,
|
||||
bottom_left_point: bottom_right_point,
|
||||
@@ -613,6 +672,13 @@ impl std::fmt::Display for FilterCondition<'_> {
|
||||
bottom_right_point[1]
|
||||
)
|
||||
}
|
||||
FilterCondition::GeoPolygon { points } => {
|
||||
write!(f, "_geoPolygon([")?;
|
||||
for point in points {
|
||||
write!(f, "[{}, {}], ", point[0], point[1])?;
|
||||
}
|
||||
write!(f, "])")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -651,7 +717,7 @@ pub mod tests {
|
||||
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
|
||||
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
|
||||
// if the string is empty we still need to return 1 for the line number
|
||||
let lines = before.is_empty().then_some(1).unwrap_or_else(|| before.lines().count());
|
||||
let lines = if before.is_empty() { 1 } else { before.lines().count() };
|
||||
let offset = before.chars().count();
|
||||
// the extra field is not checked in the tests so we can set it to nothing
|
||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||
@@ -776,12 +842,17 @@ pub mod tests {
|
||||
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
||||
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_snapshot!(p("_geoRadius(12,13,14,1000)"), @"_geoRadius({12}, {13}, {14}, {1000})");
|
||||
|
||||
// Test geo bounding box
|
||||
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
||||
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
|
||||
// Test geo polygon
|
||||
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [16, 17])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{16}, {17}], ])");
|
||||
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [-1.2,2939.2], [1,1])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{-1.2}, {2939.2}], [{1}, {1}], ])");
|
||||
|
||||
// Test OR + AND
|
||||
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
@@ -838,50 +909,80 @@ pub mod tests {
|
||||
11:12 channel = 🐻 AND followers < 100
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p("'OR'"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
insta::assert_snapshot!(p("'OR'"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `\'OR\'`.
|
||||
1:5 'OR'
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("OR"), @r###"
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
1:3 OR
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p("channel Ponce"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
insta::assert_snapshot!(p("channel Ponce"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `channel Ponce`.
|
||||
1:14 channel Ponce
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
insta::assert_snapshot!(p("channel = Ponce OR"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` but instead got nothing.
|
||||
19:19 channel = Ponce OR
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoRadius"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:11 _geoRadius
|
||||
"###);
|
||||
insta::assert_snapshot!(p("_geoRadius"), @r"
|
||||
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
|
||||
11:11 _geoRadius
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:16 _geoRadius = 12
|
||||
"###);
|
||||
insta::assert_snapshot!(p("_geoRadius = 12"), @r"
|
||||
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
|
||||
11:16 _geoRadius = 12
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox"), @r"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:16 _geoBoundingBox
|
||||
"###);
|
||||
16:16 _geoBoundingBox
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:21 _geoBoundingBox = 12
|
||||
"###);
|
||||
16:21 _geoBoundingBox = 12
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:26 _geoBoundingBox(1.0, 1.0)
|
||||
"###);
|
||||
17:26 _geoBoundingBox(1.0, 1.0)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2,3])"), @r"
|
||||
The `_geoPolygon` filter expects at least 3 points but only 1 were specified
|
||||
18:19 _geoPolygon([1,2,3])
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon(1,2,3)"), @r"
|
||||
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||
13:19 _geoPolygon(1,2,3)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2],[1,2,3])"), @r"
|
||||
Was expecting 2 coordinates but instead found 3.
|
||||
26:27 _geoPolygon([1,2],[1,2],[1,2,3])
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2,3])"), @r"
|
||||
The `_geoPolygon` filter expects at least 3 points but only 2 were specified
|
||||
24:25 _geoPolygon([1,2],[1,2,3])
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon(1)"), @r"
|
||||
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||
13:15 _geoPolygon(1)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPolygon([1,2)"), @r"
|
||||
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||
17:18 _geoPolygon([1,2)
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
@@ -938,15 +1039,15 @@ pub mod tests {
|
||||
34:35 channel = mv OR followers >= 1000)
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
insta::assert_snapshot!(p("colour NOT EXIST"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `colour NOT EXIST`.
|
||||
1:17 colour NOT EXIST
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `subscribers 100 TO1000`.
|
||||
1:23 subscribers 100 TO1000
|
||||
"###);
|
||||
");
|
||||
|
||||
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
||||
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
||||
@@ -1071,38 +1172,38 @@ pub mod tests {
|
||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||
insta::assert_snapshot!(p(r#"value NULL"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NULL`.
|
||||
1:11 value NULL
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT NULL`.
|
||||
1:15 value NOT NULL
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value EMPTY"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value EMPTY`.
|
||||
1:12 value EMPTY
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT EMPTY`.
|
||||
1:16 value NOT EMPTY
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS`.
|
||||
1:9 value IS
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS NOT"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT`.
|
||||
1:13 value IS NOT
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS EXISTS`.
|
||||
1:16 value IS EXISTS
|
||||
"###);
|
||||
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||
");
|
||||
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT EXISTS`.
|
||||
1:20 value IS NOT EXISTS
|
||||
"###);
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -14,6 +14,7 @@ license.workspace = true
|
||||
anyhow = "1.0.98"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.8.0"
|
||||
@@ -32,6 +33,7 @@ rayon = "1.10.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
@@ -45,6 +47,9 @@ tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
|
||||
rusty-s3 = "0.8.1"
|
||||
tokio = { version = "1.47.1", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![allow(clippy::result_large_err)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
@@ -148,6 +150,7 @@ impl<'a> Dump<'a> {
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
network: task.network,
|
||||
custom_metadata: task.custom_metadata,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
@@ -232,6 +235,9 @@ impl<'a> Dump<'a> {
|
||||
}
|
||||
}
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
KindDump::IndexCompaction { index_uid } => {
|
||||
KindWithContent::IndexCompaction { index_uid }
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::milli::index::RollbackOutcome;
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use meilisearch_types::{heed, milli};
|
||||
use reqwest::StatusCode;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::TaskId;
|
||||
@@ -127,6 +128,14 @@ pub enum Error {
|
||||
#[error("Aborted task")]
|
||||
AbortedTask,
|
||||
|
||||
#[error("S3 error: status: {status}, body: {body}")]
|
||||
S3Error { status: StatusCode, body: String },
|
||||
#[error("S3 HTTP error: {0}")]
|
||||
S3HttpError(reqwest::Error),
|
||||
#[error("S3 XML error: {0}")]
|
||||
S3XmlError(Box<dyn std::error::Error + Send + Sync>),
|
||||
#[error("S3 bucket error: {0}")]
|
||||
S3BucketError(rusty_s3::BucketError),
|
||||
#[error(transparent)]
|
||||
Dump(#[from] dump::Error),
|
||||
#[error(transparent)]
|
||||
@@ -226,6 +235,10 @@ impl Error {
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::FromRemoteWhenExporting { .. }
|
||||
| Error::AbortedTask
|
||||
| Error::S3Error { .. }
|
||||
| Error::S3HttpError(_)
|
||||
| Error::S3XmlError(_)
|
||||
| Error::S3BucketError(_)
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli { .. }
|
||||
@@ -293,8 +306,14 @@ impl ErrorCode for Error {
|
||||
Error::BatchNotFound(_) => Code::BatchNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
// TODO: not sure of the Code to use
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::S3Error { status, .. } if status.is_client_error() => {
|
||||
Code::InvalidS3SnapshotRequest
|
||||
}
|
||||
Error::S3Error { .. } => Code::S3SnapshotServerError,
|
||||
Error::S3HttpError(_) => Code::S3SnapshotServerError,
|
||||
Error::S3XmlError(_) => Code::S3SnapshotServerError,
|
||||
Error::S3BucketError(_) => Code::InvalidS3SnapshotParameters,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
|
||||
@@ -199,7 +199,7 @@ impl IndexMapper {
|
||||
let uuid = Uuid::new_v4();
|
||||
self.index_mapping.put(&mut wtxn, name, &uuid)?;
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
fs::create_dir_all(&index_path)?;
|
||||
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
@@ -286,7 +286,7 @@ impl IndexMapper {
|
||||
};
|
||||
|
||||
let index_map = self.index_map.clone();
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
let index_name = name.to_string();
|
||||
thread::Builder::new()
|
||||
.name(String::from("index_deleter"))
|
||||
@@ -341,6 +341,26 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Closes the specified index.
|
||||
///
|
||||
/// This operation involves closing the underlying environment and so can take a long time to complete.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
|
||||
/// in memory hash map.
|
||||
pub fn close_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
|
||||
let uuid = self
|
||||
.index_mapping
|
||||
.get(rtxn, name)?
|
||||
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
||||
|
||||
// We remove the index from the in-memory index map.
|
||||
self.index_map.write().unwrap().close_for_resize(&uuid, self.enable_mdb_writemap, 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return an index, may open it if it wasn't already opened.
|
||||
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
@@ -388,7 +408,7 @@ impl IndexMapper {
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
// take the lock to reopen the environment.
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
@@ -405,7 +425,7 @@ impl IndexMapper {
|
||||
// if it's not already there.
|
||||
match index_map.get(&uuid) {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
|
||||
break index_map
|
||||
.create(
|
||||
@@ -432,6 +452,14 @@ impl IndexMapper {
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
/// Returns the path of the index.
|
||||
///
|
||||
/// The folder located at this path is containing the data.mdb,
|
||||
/// the lock.mdb and an optional data.mdb.cpy file.
|
||||
pub fn index_path(&self, uuid: Uuid) -> PathBuf {
|
||||
self.base_path.join(uuid.to_string())
|
||||
}
|
||||
|
||||
pub fn rollback_index(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
@@ -472,7 +500,7 @@ impl IndexMapper {
|
||||
};
|
||||
}
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
Index::rollback(milli::heed::EnvOpenOptions::new().read_txn_without_tls(), index_path, to)
|
||||
.map_err(|err| crate::Error::from_milli(err, Some(name.to_string())))
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
run_loop_iteration: _,
|
||||
embedders: _,
|
||||
chat_settings: _,
|
||||
runtime: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
@@ -231,6 +232,7 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
status,
|
||||
kind,
|
||||
network,
|
||||
custom_metadata,
|
||||
} = task;
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
@@ -251,6 +253,9 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
if let Some(network) = network {
|
||||
snap.push_str(&format!("network: {network:?}, "))
|
||||
}
|
||||
if let Some(custom_metadata) = custom_metadata {
|
||||
snap.push_str(&format!("custom_metadata: {custom_metadata:?}"))
|
||||
}
|
||||
|
||||
snap.push('}');
|
||||
snap
|
||||
@@ -317,6 +322,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
|
||||
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// The main Error type is large and boxing the large variant make the pattern matching fails
|
||||
#![allow(clippy::result_large_err)]
|
||||
|
||||
/*!
|
||||
This crate defines the index scheduler, which is responsible for:
|
||||
1. Keeping references to meilisearch's indexes and mapping them to their
|
||||
@@ -213,6 +216,9 @@ pub struct IndexScheduler {
|
||||
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc<RwLock<usize>>,
|
||||
|
||||
/// The tokio runtime used for asynchronous tasks.
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
@@ -239,6 +245,7 @@ impl IndexScheduler {
|
||||
run_loop_iteration: self.run_loop_iteration.clone(),
|
||||
features: self.features.clone(),
|
||||
chat_settings: self.chat_settings,
|
||||
runtime: self.runtime.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,13 +259,23 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
#[allow(private_interfaces)] // because test_utils is private
|
||||
pub fn new(
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
||||
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
) -> Result<Self> {
|
||||
let this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
|
||||
|
||||
this.run();
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
fn new_without_run(
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
) -> Result<Self> {
|
||||
std::fs::create_dir_all(&options.tasks_path)?;
|
||||
std::fs::create_dir_all(&options.update_file_path)?;
|
||||
@@ -313,8 +330,7 @@ impl IndexScheduler {
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
let this = Self {
|
||||
Ok(Self {
|
||||
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
|
||||
version,
|
||||
queue,
|
||||
@@ -330,21 +346,38 @@ impl IndexScheduler {
|
||||
webhooks: Arc::new(webhooks),
|
||||
embedders: Default::default(),
|
||||
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr,
|
||||
#[cfg(test)]
|
||||
planned_failures,
|
||||
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||
test_breakpoint_sdr: crossbeam_channel::bounded(0).0,
|
||||
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||
planned_failures: Default::default(),
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc::new(RwLock::new(0)),
|
||||
features,
|
||||
chat_settings,
|
||||
};
|
||||
runtime,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
#[cfg(test)]
|
||||
fn new_test(
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
||||
planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
||||
) -> Result<Self> {
|
||||
let mut this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
|
||||
|
||||
this.test_breakpoint_sdr = test_breakpoint_sdr;
|
||||
this.planned_failures = planned_failures;
|
||||
|
||||
this.run();
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
|
||||
fn read_txn(&self) -> Result<RoTxn<'_, WithoutTls>> {
|
||||
self.env.read_txn().map_err(|e| e.into())
|
||||
}
|
||||
|
||||
@@ -723,6 +756,19 @@ impl IndexScheduler {
|
||||
kind: KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
self.register_with_custom_metadata(kind, task_id, None, dry_run)
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler, with metadata.
|
||||
///
|
||||
/// If it fails and data was associated with the task, it tries to delete the associated data.
|
||||
pub fn register_with_custom_metadata(
|
||||
&self,
|
||||
kind: KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
|
||||
@@ -733,7 +779,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let task = self.queue.register(&mut wtxn, &kind, task_id, dry_run)?;
|
||||
let task = self.queue.register(&mut wtxn, &kind, task_id, custom_metadata, dry_run)?;
|
||||
|
||||
// If the registered task is a task cancelation
|
||||
// we inform the processing tasks to stop (if necessary).
|
||||
@@ -757,7 +803,7 @@ impl IndexScheduler {
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump> {
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {
|
||||
Dump::new(self)
|
||||
}
|
||||
|
||||
@@ -806,10 +852,8 @@ impl IndexScheduler {
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(self.rtxn, task_id)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||
.ok_or_else(|| {
|
||||
io::Error::new(io::ErrorKind::Other, Error::CorruptedTaskQueue)
|
||||
})?;
|
||||
.map_err(io::Error::other)?
|
||||
.ok_or_else(|| io::Error::other(Error::CorruptedTaskQueue))?;
|
||||
|
||||
serde_json::to_writer(&mut self.buffer, &TaskView::from_task(&task))?;
|
||||
self.buffer.push(b'\n');
|
||||
|
||||
@@ -75,6 +75,7 @@ make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
CancelingUpgrade,
|
||||
CleaningCompactionLeftover,
|
||||
UpdatingTasks,
|
||||
}
|
||||
}
|
||||
@@ -138,6 +139,17 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum IndexCompaction {
|
||||
RetrieveTheIndex,
|
||||
CreateTemporaryFile,
|
||||
CopyAndCompactTheIndex,
|
||||
PersistTheCompactedIndex,
|
||||
CloseTheIndex,
|
||||
ReopenTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum InnerSwappingTwoIndexes {
|
||||
RetrieveTheTasks,
|
||||
|
||||
@@ -257,6 +257,7 @@ impl Queue {
|
||||
wtxn: &mut RwTxn,
|
||||
kind: &KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
@@ -280,6 +281,7 @@ impl Queue {
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
network: None,
|
||||
custom_metadata,
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
@@ -310,7 +312,8 @@ impl Queue {
|
||||
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
let to_delete =
|
||||
RoaringBitmap::from_sorted_iter(finished.into_iter().take(100_000)).unwrap();
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
@@ -326,7 +329,7 @@ impl Queue {
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let newest_task_id = to_delete.iter().next_back().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
@@ -343,6 +346,7 @@ impl Queue {
|
||||
tasks: to_delete,
|
||||
},
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -68,13 +68,14 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
KindWithContent::IndexCompaction { .. }
|
||||
| KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -287,8 +288,10 @@ impl BatchKind {
|
||||
};
|
||||
|
||||
match (self, autobatch_kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break((this, BatchStopReason::TaskCannotBeBatched { kind, id })),
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => {
|
||||
Break((this, BatchStopReason::TaskCannotBeBatched { kind, id }))
|
||||
},
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break((this, BatchStopReason::IndexCreationMismatch { id }))
|
||||
|
||||
@@ -55,6 +55,10 @@ pub(crate) enum Batch {
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -66,6 +70,7 @@ pub(crate) enum DocumentOperation {
|
||||
|
||||
/// A [batch](Batch) that combines multiple tasks operating on an index.
|
||||
#[derive(Debug)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub(crate) enum IndexOperation {
|
||||
DocumentOperation {
|
||||
index_uid: String,
|
||||
@@ -109,7 +114,8 @@ impl Batch {
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::Export { task }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
| Batch::IndexUpdate { task, .. }
|
||||
| Batch::IndexCompaction { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
@@ -154,7 +160,8 @@ impl Batch {
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
| IndexDeletion { index_uid, .. }
|
||||
| IndexCompaction { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -174,6 +181,7 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
@@ -511,17 +519,33 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the export.
|
||||
// 3. we get the next task to compact
|
||||
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
|
||||
if let Some(task_id) = to_compact.min() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexCompaction,
|
||||
id: task_id,
|
||||
});
|
||||
let index_uid =
|
||||
task.index_uid().expect("Compaction task must have an index uid").to_owned();
|
||||
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
current_batch.processing([&mut task]);
|
||||
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||
current_batch
|
||||
.reason(BatchStopReason::TaskCannotBeBatched { kind: Kind::Export, id: task_id });
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the snapshot.
|
||||
// 5. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -531,7 +555,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 5. we batch the dumps.
|
||||
// 6. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -544,7 +568,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
@@ -25,6 +25,7 @@ use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::S3SnapshotOptions;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use process_batch::ProcessBatchInfo;
|
||||
use rayon::current_num_threads;
|
||||
@@ -87,11 +88,14 @@ pub struct Scheduler {
|
||||
|
||||
/// Snapshot compaction status.
|
||||
pub(crate) experimental_no_snapshot_compaction: bool,
|
||||
|
||||
/// S3 Snapshot options.
|
||||
pub(crate) s3_snapshot_options: Option<S3SnapshotOptions>,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub(crate) fn private_clone(&self) -> Scheduler {
|
||||
Scheduler {
|
||||
pub(crate) fn private_clone(&self) -> Self {
|
||||
Self {
|
||||
must_stop_processing: self.must_stop_processing.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
@@ -103,23 +107,52 @@ impl Scheduler {
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
embedding_cache_cap: self.embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction,
|
||||
s3_snapshot_options: self.s3_snapshot_options.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
|
||||
let IndexSchedulerOptions {
|
||||
version_file_path,
|
||||
auth_path: _,
|
||||
tasks_path: _,
|
||||
update_file_path: _,
|
||||
indexes_path: _,
|
||||
snapshots_path,
|
||||
dumps_path,
|
||||
cli_webhook_url: _,
|
||||
cli_webhook_authorization: _,
|
||||
task_db_size: _,
|
||||
index_base_map_size: _,
|
||||
enable_mdb_writemap: _,
|
||||
index_growth_amount: _,
|
||||
index_count: _,
|
||||
indexer_config,
|
||||
autobatching_enabled,
|
||||
cleanup_enabled: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit,
|
||||
instance_features: _,
|
||||
auto_upgrade: _,
|
||||
embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction,
|
||||
} = options;
|
||||
|
||||
Scheduler {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: options.batched_tasks_size_limit,
|
||||
dumps_path: options.dumps_path.clone(),
|
||||
snapshots_path: options.snapshots_path.clone(),
|
||||
autobatching_enabled: *autobatching_enabled,
|
||||
max_number_of_batched_tasks: *max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: *batched_tasks_size_limit,
|
||||
dumps_path: dumps_path.clone(),
|
||||
snapshots_path: snapshots_path.clone(),
|
||||
auth_env,
|
||||
version_file_path: options.version_file_path.clone(),
|
||||
embedding_cache_cap: options.embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction: options.experimental_no_snapshot_compaction,
|
||||
version_file_path: version_file_path.clone(),
|
||||
embedding_cache_cap: *embedding_cache_cap,
|
||||
experimental_no_snapshot_compaction: *experimental_no_snapshot_compaction,
|
||||
s3_snapshot_options: indexer_config.s3_snapshot_options.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,22 +1,27 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::fs::{remove_file, File};
|
||||
use std::io::{ErrorKind, Seek, SeekFrom};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::{PersistError, TempPath};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
IndexCompaction, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress,
|
||||
TaskDeletionProgress, UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{
|
||||
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
|
||||
@@ -24,6 +29,9 @@ use crate::utils::{
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
/// The name of the copy of the data.mdb file used during compaction.
|
||||
const DATA_MDB_COPY_NAME: &str = "data.mdb.cpy";
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
@@ -418,6 +426,47 @@ impl IndexScheduler {
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexCompaction { index_uid: _, mut task } => {
|
||||
let KindWithContent::IndexCompaction { index_uid } = &task.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| {
|
||||
self.apply_compaction(&rtxn, &progress, index_uid)
|
||||
}));
|
||||
|
||||
let (pre_size, post_size) = match ret {
|
||||
Ok(Ok(stats)) => stats,
|
||||
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
if let Some(Details::IndexCompaction {
|
||||
index_uid: _,
|
||||
pre_compaction_size,
|
||||
post_compaction_size,
|
||||
}) = task.details.as_mut()
|
||||
{
|
||||
*pre_compaction_size = Some(Byte::from_u64(pre_size));
|
||||
*post_compaction_size = Some(Byte::from_u64(post_size));
|
||||
}
|
||||
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::Export { mut task } => {
|
||||
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
|
||||
else {
|
||||
@@ -493,6 +542,92 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_compaction(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
progress: &Progress,
|
||||
index_uid: &str,
|
||||
) -> Result<(u64, u64)> {
|
||||
// 1. Verify that the index exists
|
||||
if !self.index_mapper.index_exists(rtxn, index_uid)? {
|
||||
return Err(Error::IndexNotFound(index_uid.to_owned()));
|
||||
}
|
||||
|
||||
// 2. We retrieve the index and create a temporary file in the index directory
|
||||
progress.update_progress(IndexCompaction::RetrieveTheIndex);
|
||||
let index = self.index_mapper.index(rtxn, index_uid)?;
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
self.index_mapper
|
||||
.set_currently_updating_index(Some((index_uid.to_string(), index.clone())));
|
||||
|
||||
progress.update_progress(IndexCompaction::CreateTemporaryFile);
|
||||
let src_path = index.path().join("data.mdb");
|
||||
let pre_size = std::fs::metadata(&src_path)?.len();
|
||||
|
||||
let dst_path = TempPath::from_path(index.path().join(DATA_MDB_COPY_NAME));
|
||||
let file = File::create(&dst_path)?;
|
||||
let mut file = tempfile::NamedTempFile::from_parts(file, dst_path);
|
||||
|
||||
// 3. We copy the index data to the temporary file
|
||||
progress.update_progress(IndexCompaction::CopyAndCompactTheIndex);
|
||||
index
|
||||
.copy_to_file(file.as_file_mut(), CompactionOption::Enabled)
|
||||
.map_err(|error| Error::Milli { error, index_uid: Some(index_uid.to_string()) })?;
|
||||
// ...and reset the file position as specified in the documentation
|
||||
file.seek(SeekFrom::Start(0))?;
|
||||
|
||||
// 4. We replace the index data file with the temporary file
|
||||
progress.update_progress(IndexCompaction::PersistTheCompactedIndex);
|
||||
match file.persist(src_path) {
|
||||
Ok(file) => file.sync_all()?,
|
||||
// TODO see if we have a _resource busy_ error and probably handle this by:
|
||||
// 1. closing the index, 2. replacing and 3. reopening it
|
||||
Err(PersistError { error, file: _ }) => return Err(Error::IoError(error)),
|
||||
};
|
||||
|
||||
// 5. Prepare to close the index
|
||||
progress.update_progress(IndexCompaction::CloseTheIndex);
|
||||
|
||||
// unmark that the index is the processing one so we don't keep a handle to it, preventing its closing
|
||||
self.index_mapper.set_currently_updating_index(None);
|
||||
|
||||
self.index_mapper.close_index(rtxn, index_uid)?;
|
||||
drop(index);
|
||||
|
||||
progress.update_progress(IndexCompaction::ReopenTheIndex);
|
||||
// 6. Reopen the index
|
||||
// The index will use the compacted data file when being reopened
|
||||
let index = self.index_mapper.index(rtxn, index_uid)?;
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<_> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(stats.database_size)
|
||||
}();
|
||||
|
||||
let post_size = match res {
|
||||
Ok(post_size) => post_size,
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
);
|
||||
0
|
||||
}
|
||||
};
|
||||
|
||||
Ok((pre_size, post_size))
|
||||
}
|
||||
|
||||
/// Swap the index `lhs` with the index `rhs`.
|
||||
fn apply_index_swap(
|
||||
&self,
|
||||
@@ -780,9 +915,10 @@ impl IndexScheduler {
|
||||
|
||||
let enqueued_tasks = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
|
||||
// 0. Check if any upgrade task was matched.
|
||||
// 0. Check if any upgrade or compaction tasks were matched.
|
||||
// If so, we cancel all the failed or enqueued upgrade tasks.
|
||||
let upgrade_tasks = &self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)?;
|
||||
let compaction_tasks = &self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)?;
|
||||
let is_canceling_upgrade = !matched_tasks.is_disjoint(upgrade_tasks);
|
||||
if is_canceling_upgrade {
|
||||
let failed_tasks = self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
@@ -847,7 +983,33 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
// 3. We now have a list of tasks to cancel, cancel them
|
||||
// 3. If we are cancelling a compaction task, remove the tempfiles after incomplete compactions
|
||||
for compaction_task in &tasks_to_cancel & compaction_tasks {
|
||||
progress.update_progress(TaskCancelationProgress::CleaningCompactionLeftover);
|
||||
let task = self.queue.tasks.get_task(rtxn, compaction_task)?.unwrap();
|
||||
let Some(Details::IndexCompaction {
|
||||
index_uid,
|
||||
pre_compaction_size: _,
|
||||
post_compaction_size: _,
|
||||
}) = task.details
|
||||
else {
|
||||
unreachable!("wrong details for compaction task {compaction_task}")
|
||||
};
|
||||
|
||||
let index_path = match self.index_mapper.index_mapping.get(rtxn, &index_uid)? {
|
||||
Some(index_uuid) => self.index_mapper.index_path(index_uuid),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if let Err(e) = remove_file(index_path.join(DATA_MDB_COPY_NAME)) {
|
||||
match e.kind() {
|
||||
ErrorKind::NotFound => (),
|
||||
_ => return Err(Error::IoError(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. We now have a list of tasks to cancel, cancel them
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
|
||||
@@ -370,7 +370,7 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||
}
|
||||
Err(e) => e.into(),
|
||||
},
|
||||
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
|
||||
ureq::Error::Transport(transport) => io::Error::other(transport).into(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
|
||||
use crate::queue::TaskQueue;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
const UPDATE_FILES_DIR_NAME: &str = "update_files";
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// See [`EnvOpenOptions::open`].
|
||||
@@ -78,10 +80,32 @@ impl IndexScheduler {
|
||||
pub(super) fn process_snapshot(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
||||
|
||||
match self.scheduler.s3_snapshot_options.clone() {
|
||||
Some(options) => {
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = options;
|
||||
panic!("Non-unix platform does not support S3 snapshotting");
|
||||
}
|
||||
#[cfg(unix)]
|
||||
self.runtime
|
||||
.as_ref()
|
||||
.expect("Runtime not initialized")
|
||||
.block_on(self.process_snapshot_to_s3(progress, options, tasks))
|
||||
}
|
||||
None => self.process_snapshots_to_disk(progress, tasks),
|
||||
}
|
||||
}
|
||||
|
||||
fn process_snapshots_to_disk(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>, Error> {
|
||||
fs::create_dir_all(&self.scheduler.snapshots_path)?;
|
||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||
|
||||
@@ -128,7 +152,7 @@ impl IndexScheduler {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2.4 Create the update files directory
|
||||
let update_files_dir = temp_snapshot_dir.path().join("update_files");
|
||||
let update_files_dir = temp_snapshot_dir.path().join(UPDATE_FILES_DIR_NAME);
|
||||
fs::create_dir_all(&update_files_dir)?;
|
||||
|
||||
// 2.5 Only copy the update files of the enqueued tasks
|
||||
@@ -140,7 +164,7 @@ impl IndexScheduler {
|
||||
let task =
|
||||
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
let src = self.queue.file_store.get_update_path(content_uuid);
|
||||
let src = self.queue.file_store.update_path(content_uuid);
|
||||
let dst = update_files_dir.join(content_uuid.to_string());
|
||||
fs::copy(src, dst)?;
|
||||
}
|
||||
@@ -206,4 +230,403 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub(super) async fn process_snapshot_to_s3(
|
||||
&self,
|
||||
progress: Progress,
|
||||
opts: meilisearch_types::milli::update::S3SnapshotOptions,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
use meilisearch_types::milli::update::S3SnapshotOptions;
|
||||
|
||||
let S3SnapshotOptions {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_max_in_flight_parts,
|
||||
s3_compression_level: level,
|
||||
s3_signature_duration,
|
||||
s3_multipart_part_size,
|
||||
} = opts;
|
||||
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
let retry_backoff = backoff::ExponentialBackoff::default();
|
||||
let db_name = {
|
||||
let mut base_path = self.env.path().to_owned();
|
||||
base_path.pop();
|
||||
base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms").to_string()
|
||||
};
|
||||
|
||||
let (reader, writer) = std::io::pipe()?;
|
||||
let uploader_task = tokio::spawn(multipart_stream_to_s3(
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_max_in_flight_parts,
|
||||
s3_signature_duration,
|
||||
s3_multipart_part_size,
|
||||
must_stop_processing,
|
||||
retry_backoff,
|
||||
db_name,
|
||||
reader,
|
||||
));
|
||||
|
||||
let index_scheduler = IndexScheduler::private_clone(self);
|
||||
let builder_task = tokio::task::spawn_blocking(move || {
|
||||
stream_tarball_into_pipe(progress, level, writer, index_scheduler)
|
||||
});
|
||||
|
||||
let (uploader_result, builder_result) = tokio::join!(uploader_task, builder_task);
|
||||
|
||||
// Check uploader result first to early return on task abortion.
|
||||
// safety: JoinHandle can return an error if the task was aborted, cancelled, or panicked.
|
||||
uploader_result.unwrap()?;
|
||||
builder_result.unwrap()?;
|
||||
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
|
||||
/// Streams a tarball of the database content into a pipe.
|
||||
#[cfg(unix)]
|
||||
fn stream_tarball_into_pipe(
|
||||
progress: Progress,
|
||||
level: u32,
|
||||
writer: std::io::PipeWriter,
|
||||
index_scheduler: IndexScheduler,
|
||||
) -> std::result::Result<(), Error> {
|
||||
use std::io::Write as _;
|
||||
use std::path::Path;
|
||||
|
||||
let writer = flate2::write::GzEncoder::new(writer, flate2::Compression::new(level));
|
||||
let mut tarball = tar::Builder::new(writer);
|
||||
|
||||
// 1. Snapshot the version file
|
||||
tarball
|
||||
.append_path_with_name(&index_scheduler.scheduler.version_file_path, VERSION_FILE_NAME)?;
|
||||
|
||||
// 2. Snapshot the index scheduler LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||
let tasks_env_file = index_scheduler.env.try_clone_inner_file()?;
|
||||
let path = Path::new("tasks").join("data.mdb");
|
||||
append_file_to_tarball(&mut tarball, path, tasks_env_file)?;
|
||||
|
||||
// 2.3 Create a read transaction on the index-scheduler
|
||||
let rtxn = index_scheduler.env.read_txn()?;
|
||||
|
||||
// 2.4 Create the update files directory
|
||||
// And only copy the update files of the enqueued tasks
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||
let enqueued = index_scheduler.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||
progress.update_progress(update_file_progress);
|
||||
|
||||
// We create the update_files directory so that it
|
||||
// always exists even if there are no update files
|
||||
let update_files_dir = Path::new(UPDATE_FILES_DIR_NAME);
|
||||
let src_update_files_dir = {
|
||||
let mut path = index_scheduler.env.path().to_path_buf();
|
||||
path.pop();
|
||||
path.join(UPDATE_FILES_DIR_NAME)
|
||||
};
|
||||
tarball.append_dir(update_files_dir, src_update_files_dir)?;
|
||||
|
||||
for task_id in enqueued {
|
||||
let task = index_scheduler
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, task_id)?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
use std::fs::File;
|
||||
|
||||
let src = index_scheduler.queue.file_store.update_path(content_uuid);
|
||||
let mut update_file = File::open(src)?;
|
||||
let path = update_files_dir.join(content_uuid.to_string());
|
||||
tarball.append_file(path, &mut update_file)?;
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3. Snapshot every indexes
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||
let index_mapping = index_scheduler.index_mapper.index_mapping;
|
||||
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||
let indexes_dir = Path::new("indexes");
|
||||
let indexes_references: Vec<_> = index_scheduler
|
||||
.index_mapper
|
||||
.index_mapping
|
||||
.iter(&rtxn)?
|
||||
.map(|res| res.map_err(Error::from).map(|(name, uuid)| (name.to_string(), uuid)))
|
||||
.collect::<Result<_, Error>>()?;
|
||||
|
||||
// It's prettier to use a for loop instead of the IndexMapper::try_for_each_index
|
||||
// method, especially when we need to access the UUID, local path and index number.
|
||||
for (i, (name, uuid)) in indexes_references.into_iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
|
||||
&name, i as u32, nb_indexes,
|
||||
));
|
||||
let path = indexes_dir.join(uuid.to_string()).join("data.mdb");
|
||||
let index = index_scheduler.index_mapper.index(&rtxn, &name)?;
|
||||
let index_file = index.try_clone_inner_file()?;
|
||||
tracing::trace!("Appending index file for {name} in {}", path.display());
|
||||
append_file_to_tarball(&mut tarball, path, index_file)?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
|
||||
// 4. Snapshot the auth LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||
let auth_env_file = index_scheduler.scheduler.auth_env.try_clone_inner_file()?;
|
||||
let path = Path::new("auth").join("data.mdb");
|
||||
append_file_to_tarball(&mut tarball, path, auth_env_file)?;
|
||||
|
||||
let mut gzencoder = tarball.into_inner()?;
|
||||
gzencoder.flush()?;
|
||||
gzencoder.try_finish()?;
|
||||
let mut writer = gzencoder.finish()?;
|
||||
writer.flush()?;
|
||||
|
||||
Result::<_, Error>::Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn append_file_to_tarball<W, P>(
|
||||
tarball: &mut tar::Builder<W>,
|
||||
path: P,
|
||||
mut auth_env_file: fs::File,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
W: std::io::Write,
|
||||
P: AsRef<std::path::Path>,
|
||||
{
|
||||
use std::io::{Seek as _, SeekFrom};
|
||||
|
||||
// Note: A previous snapshot operation may have left the cursor
|
||||
// at the end of the file so we need to seek to the start.
|
||||
auth_env_file.seek(SeekFrom::Start(0))?;
|
||||
tarball.append_file(path, &mut auth_env_file)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Streams the content read from the given reader to S3.
|
||||
#[cfg(unix)]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn multipart_stream_to_s3(
|
||||
s3_bucket_url: String,
|
||||
s3_bucket_region: String,
|
||||
s3_bucket_name: String,
|
||||
s3_snapshot_prefix: String,
|
||||
s3_access_key: String,
|
||||
s3_secret_key: String,
|
||||
s3_max_in_flight_parts: std::num::NonZero<usize>,
|
||||
s3_signature_duration: std::time::Duration,
|
||||
s3_multipart_part_size: u64,
|
||||
must_stop_processing: super::MustStopProcessing,
|
||||
retry_backoff: backoff::exponential::ExponentialBackoff<backoff::SystemClock>,
|
||||
db_name: String,
|
||||
reader: std::io::PipeReader,
|
||||
) -> Result<(), Error> {
|
||||
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use reqwest::{Client, Response};
|
||||
use rusty_s3::S3Action as _;
|
||||
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
let reader = OwnedFd::from(reader);
|
||||
let reader = tokio::net::unix::pipe::Receiver::from_owned_fd(reader)?;
|
||||
let s3_snapshot_prefix = PathBuf::from(s3_snapshot_prefix);
|
||||
let url =
|
||||
s3_bucket_url.parse().map_err(BucketError::ParseError).map_err(Error::S3BucketError)?;
|
||||
let bucket = Bucket::new(url, UrlStyle::Path, s3_bucket_name, s3_bucket_region)
|
||||
.map_err(Error::S3BucketError)?;
|
||||
let credential = Credentials::new(s3_access_key, s3_secret_key);
|
||||
|
||||
// Note for the future (rust 1.91+): use with_added_extension, it's prettier
|
||||
let object_path = s3_snapshot_prefix.join(format!("{db_name}.snapshot"));
|
||||
// Note: It doesn't work on Windows and if a port to this platform is needed,
|
||||
// use the slash-path crate or similar to get the correct path separator.
|
||||
let object = object_path.display().to_string();
|
||||
|
||||
let action = bucket.create_multipart_upload(Some(&credential), &object);
|
||||
let url = action.sign(s3_signature_duration);
|
||||
|
||||
let client = Client::new();
|
||||
let resp = client.post(url).send().await.map_err(Error::S3HttpError)?;
|
||||
let status = resp.status();
|
||||
|
||||
let body = match resp.error_for_status_ref() {
|
||||
Ok(_) => resp.text().await.map_err(Error::S3HttpError)?,
|
||||
Err(_) => {
|
||||
return Err(Error::S3Error { status, body: resp.text().await.unwrap_or_default() })
|
||||
}
|
||||
};
|
||||
|
||||
let multipart =
|
||||
CreateMultipartUpload::parse_response(&body).map_err(|e| Error::S3XmlError(Box::new(e)))?;
|
||||
tracing::debug!("Starting the upload of the snapshot to {object}");
|
||||
|
||||
// We use this bumpalo for etags strings.
|
||||
let bump = bumpalo::Bump::new();
|
||||
let mut etags = Vec::<&str>::new();
|
||||
let mut in_flight = VecDeque::<(JoinHandle<reqwest::Result<Response>>, Bytes)>::with_capacity(
|
||||
s3_max_in_flight_parts.get(),
|
||||
);
|
||||
|
||||
// Part numbers start at 1 and cannot be larger than 10k
|
||||
for part_number in 1u16.. {
|
||||
if must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let part_upload =
|
||||
bucket.upload_part(Some(&credential), &object, part_number, multipart.upload_id());
|
||||
let url = part_upload.sign(s3_signature_duration);
|
||||
|
||||
// Wait for a buffer to be ready if there are in-flight parts that landed
|
||||
let mut buffer = if in_flight.len() >= s3_max_in_flight_parts.get() {
|
||||
let (handle, buffer) = in_flight.pop_front().expect("At least one in flight request");
|
||||
let resp = join_and_map_error(handle).await?;
|
||||
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
|
||||
|
||||
let mut buffer = match buffer.try_into_mut() {
|
||||
Ok(buffer) => buffer,
|
||||
Err(_) => unreachable!("All bytes references were consumed in the task"),
|
||||
};
|
||||
buffer.clear();
|
||||
buffer
|
||||
} else {
|
||||
BytesMut::with_capacity(s3_multipart_part_size as usize)
|
||||
};
|
||||
|
||||
// If we successfully read enough bytes,
|
||||
// we can continue and send the buffer/part
|
||||
while buffer.len() < (s3_multipart_part_size as usize / 2) {
|
||||
// Wait for the pipe to be readable
|
||||
|
||||
use std::io;
|
||||
reader.readable().await?;
|
||||
|
||||
match reader.try_read_buf(&mut buffer) {
|
||||
Ok(0) => break,
|
||||
// We read some bytes but maybe not enough
|
||||
Ok(_) => continue,
|
||||
// The readiness event is a false positive.
|
||||
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
if buffer.is_empty() {
|
||||
// Break the loop if the buffer is
|
||||
// empty after we tried to read bytes
|
||||
break;
|
||||
}
|
||||
|
||||
let body = buffer.freeze();
|
||||
tracing::trace!("Sending part {part_number}");
|
||||
let task = tokio::spawn({
|
||||
let client = client.clone();
|
||||
let body = body.clone();
|
||||
backoff::future::retry(retry_backoff.clone(), move || {
|
||||
let client = client.clone();
|
||||
let url = url.clone();
|
||||
let body = body.clone();
|
||||
async move {
|
||||
match client.put(url).body(body).send().await {
|
||||
Ok(resp) if resp.status().is_client_error() => {
|
||||
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||
}
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err(backoff::Error::transient(e)),
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
in_flight.push_back((task, body));
|
||||
}
|
||||
|
||||
for (handle, _buffer) in in_flight {
|
||||
let resp = join_and_map_error(handle).await?;
|
||||
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
|
||||
}
|
||||
|
||||
tracing::debug!("Finalizing the multipart upload");
|
||||
|
||||
let action = bucket.complete_multipart_upload(
|
||||
Some(&credential),
|
||||
&object,
|
||||
multipart.upload_id(),
|
||||
etags.iter().map(AsRef::as_ref),
|
||||
);
|
||||
let url = action.sign(s3_signature_duration);
|
||||
let body = action.body();
|
||||
let resp = backoff::future::retry(retry_backoff, move || {
|
||||
let client = client.clone();
|
||||
let url = url.clone();
|
||||
let body = body.clone();
|
||||
async move {
|
||||
match client.post(url).body(body).send().await {
|
||||
Ok(resp) if resp.status().is_client_error() => {
|
||||
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||
}
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err(backoff::Error::transient(e)),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(Error::S3HttpError)?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
|
||||
if status.is_success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::S3Error { status, body })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
async fn join_and_map_error(
|
||||
join_handle: tokio::task::JoinHandle<Result<reqwest::Response, reqwest::Error>>,
|
||||
) -> Result<reqwest::Response> {
|
||||
// safety: Panic happens if the task (JoinHandle) was aborted, cancelled, or panicked
|
||||
let request = join_handle.await.unwrap();
|
||||
let resp = request.map_err(Error::S3HttpError)?;
|
||||
match resp.error_for_status_ref() {
|
||||
Ok(_) => Ok(resp),
|
||||
Err(_) => Err(Error::S3Error {
|
||||
status: resp.status(),
|
||||
body: resp.text().await.unwrap_or_default(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn extract_and_append_etag<'b>(
|
||||
bump: &'b bumpalo::Bump,
|
||||
etags: &mut Vec<&'b str>,
|
||||
headers: &reqwest::header::HeaderMap,
|
||||
) -> Result<()> {
|
||||
use reqwest::header::ETAG;
|
||||
|
||||
let etag = headers.get(ETAG).ok_or_else(|| Error::S3XmlError("Missing ETag header".into()))?;
|
||||
let etag = etag.to_str().map_err(|e| Error::S3XmlError(Box::new(e)))?;
|
||||
etags.push(bump.alloc_str(etag));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -722,7 +722,7 @@ fn basic_get_stats() {
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -742,6 +742,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -753,10 +754,10 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -776,6 +777,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -787,7 +789,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
@@ -797,7 +799,7 @@ fn basic_get_stats() {
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -817,6 +819,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -828,7 +831,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
@@ -839,7 +842,7 @@ fn basic_get_stats() {
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -859,6 +862,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -870,7 +874,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -126,7 +126,7 @@ impl IndexScheduler {
|
||||
std::fs::create_dir_all(&options.auth_path).unwrap();
|
||||
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
|
||||
let index_scheduler =
|
||||
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
|
||||
Self::new_test(options, auth_env, version, None, sender, planned_failures).unwrap();
|
||||
|
||||
// To be 100% consistent between all test we're going to start the scheduler right now
|
||||
// and ensure it's in the expected starting state.
|
||||
|
||||
@@ -45,6 +45,11 @@ pub fn upgrade_index_scheduler(
|
||||
(1, 19, _) => 0,
|
||||
(1, 20, _) => 0,
|
||||
(1, 21, _) => 0,
|
||||
(1, 22, _) => 0,
|
||||
(1, 23, _) => 0,
|
||||
(1, 24, _) => 0,
|
||||
(1, 25, _) => 0,
|
||||
(1, 26, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
@@ -95,6 +100,7 @@ pub fn upgrade_index_scheduler(
|
||||
status: Status::Enqueued,
|
||||
kind: KindWithContent::UpgradeDatabase { from },
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -256,14 +256,15 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
use KindWithContent as K;
|
||||
let mut index_uids = vec![];
|
||||
match &mut task.kind {
|
||||
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
||||
K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
|
||||
K::IndexCreation { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| K::DocumentEdition { index_uid, .. }
|
||||
| K::DocumentDeletion { index_uid, .. }
|
||||
| K::DocumentDeletionByFilter { index_uid, .. }
|
||||
| K::DocumentClear { index_uid }
|
||||
| K::SettingsUpdate { index_uid, .. }
|
||||
| K::IndexDeletion { index_uid }
|
||||
| K::IndexCreation { index_uid, .. }
|
||||
| K::IndexCompaction { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::IndexUpdate { index_uid, new_index_uid, .. } => {
|
||||
index_uids.push(index_uid);
|
||||
if let Some(new_uid) = new_index_uid {
|
||||
@@ -378,6 +379,7 @@ impl crate::IndexScheduler {
|
||||
status,
|
||||
kind,
|
||||
network: _,
|
||||
custom_metadata: _,
|
||||
} = task;
|
||||
assert_eq!(uid, task.uid);
|
||||
if task.status != Status::Enqueued {
|
||||
@@ -618,6 +620,13 @@ impl crate::IndexScheduler {
|
||||
Details::UpgradeDatabase { from: _, to: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
||||
}
|
||||
Details::IndexCompaction {
|
||||
index_uid: _,
|
||||
pre_compaction_size: _,
|
||||
post_compaction_size: _,
|
||||
} => {
|
||||
assert_eq!(kind.as_kind(), Kind::IndexCompaction);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -271,9 +271,10 @@ macro_rules! json_string {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate as meili_snap;
|
||||
use crate::UUID_IN_MESSAGE_RE;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[test]
|
||||
fn snap() {
|
||||
|
||||
@@ -109,6 +109,7 @@ impl HeedAuthStore {
|
||||
Action::IndexesGet,
|
||||
Action::IndexesUpdate,
|
||||
Action::IndexesSwap,
|
||||
Action::IndexesCompact,
|
||||
]
|
||||
.iter(),
|
||||
);
|
||||
@@ -315,7 +316,9 @@ impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(
|
||||
(key_id, action, index): &'_ Self::EItem,
|
||||
) -> StdResult<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
bytes.extend_from_slice(key_id.as_bytes());
|
||||
|
||||
@@ -5,6 +5,7 @@ use actix_web::{self as aweb, HttpResponseBuilder};
|
||||
use aweb::http::header;
|
||||
use aweb::rt::task::JoinError;
|
||||
use convert_case::Casing;
|
||||
use milli::cellulite;
|
||||
use milli::heed::{Error as HeedError, MdbError};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
@@ -239,6 +240,7 @@ InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQU
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeojsonField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -252,10 +254,12 @@ InvalidSearchHybridQuery , InvalidRequest , BAD_REQU
|
||||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexCustomMetadata , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchQueryPersonalization , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -313,6 +317,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -388,6 +394,9 @@ TooManyVectors , InvalidRequest , BAD_REQU
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
InvalidS3SnapshotRequest , Internal , BAD_REQUEST ;
|
||||
InvalidS3SnapshotParameters , Internal , BAD_REQUEST ;
|
||||
S3SnapshotServerError , Internal , BAD_GATEWAY ;
|
||||
|
||||
// Experimental features
|
||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -501,7 +510,9 @@ impl ErrorCode for milli::Error {
|
||||
Code::InvalidFacetSearchFacetName
|
||||
}
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidGeoField { .. } | UserError::GeoJsonError(_) => {
|
||||
Code::InvalidDocumentGeoField
|
||||
}
|
||||
UserError::InvalidVectorDimensions { .. }
|
||||
| UserError::InvalidIndexingVectorDimensions { .. } => {
|
||||
Code::InvalidVectorDimensions
|
||||
@@ -525,6 +536,17 @@ impl ErrorCode for milli::Error {
|
||||
| UserError::DocumentEditionCompilationError(_) => {
|
||||
Code::EditDocumentsByFunctionError
|
||||
}
|
||||
UserError::CelluliteError(err) => match err {
|
||||
cellulite::Error::BuildCanceled
|
||||
| cellulite::Error::VersionMismatchOnBuild(_)
|
||||
| cellulite::Error::DatabaseDoesntExists
|
||||
| cellulite::Error::Heed(_)
|
||||
| cellulite::Error::InvalidGeometry(_)
|
||||
| cellulite::Error::InternalDocIdMissing(_, _)
|
||||
| cellulite::Error::CannotConvertLineToCell(_, _, _) => Code::Internal,
|
||||
cellulite::Error::InvalidGeoJson(_) => Code::InvalidDocumentGeojsonField,
|
||||
},
|
||||
UserError::MalformedGeojson(_) => Code::InvalidDocumentGeojsonField,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -664,6 +686,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `personalize` is invalid, expected a JSON object with `userContext` string.")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `userContext` is invalid, expected a string.")
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
||||
@@ -380,6 +380,9 @@ pub enum Action {
|
||||
#[serde(rename = "webhooks.*")]
|
||||
#[deserr(rename = "webhooks.*")]
|
||||
WebhooksAll,
|
||||
#[serde(rename = "indexes.compact")]
|
||||
#[deserr(rename = "indexes.compact")]
|
||||
IndexesCompact,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
@@ -398,6 +401,7 @@ impl Action {
|
||||
INDEXES_UPDATE => Some(Self::IndexesUpdate),
|
||||
INDEXES_DELETE => Some(Self::IndexesDelete),
|
||||
INDEXES_SWAP => Some(Self::IndexesSwap),
|
||||
INDEXES_COMPACT => Some(Self::IndexesCompact),
|
||||
TASKS_ALL => Some(Self::TasksAll),
|
||||
TASKS_CANCEL => Some(Self::TasksCancel),
|
||||
TASKS_DELETE => Some(Self::TasksDelete),
|
||||
@@ -462,6 +466,7 @@ impl Action {
|
||||
IndexesUpdate => false,
|
||||
IndexesDelete => false,
|
||||
IndexesSwap => false,
|
||||
IndexesCompact => false,
|
||||
TasksCancel => false,
|
||||
TasksDelete => false,
|
||||
TasksGet => true,
|
||||
@@ -513,6 +518,7 @@ pub mod actions {
|
||||
pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr();
|
||||
pub const INDEXES_DELETE: u8 = IndexesDelete.repr();
|
||||
pub const INDEXES_SWAP: u8 = IndexesSwap.repr();
|
||||
pub const INDEXES_COMPACT: u8 = IndexesCompact.repr();
|
||||
pub const TASKS_ALL: u8 = TasksAll.repr();
|
||||
pub const TASKS_CANCEL: u8 = TasksCancel.repr();
|
||||
pub const TASKS_DELETE: u8 = TasksDelete.repr();
|
||||
@@ -614,6 +620,7 @@ pub(crate) mod test {
|
||||
assert!(WebhooksDelete.repr() == 47 && WEBHOOKS_DELETE == 47);
|
||||
assert!(WebhooksCreate.repr() == 48 && WEBHOOKS_CREATE == 48);
|
||||
assert!(WebhooksAll.repr() == 49 && WEBHOOKS_ALL == 49);
|
||||
assert!(IndexesCompact.repr() == 50 && INDEXES_COMPACT == 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![allow(clippy::result_large_err)]
|
||||
|
||||
pub mod batch_view;
|
||||
pub mod batches;
|
||||
pub mod compression;
|
||||
|
||||
@@ -346,24 +346,26 @@ impl<T> Settings<T> {
|
||||
continue;
|
||||
};
|
||||
|
||||
Self::hide_secret(api_key);
|
||||
hide_secret(api_key, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn hide_secret(secret: &mut String) {
|
||||
match secret.len() {
|
||||
x if x < 10 => {
|
||||
secret.replace_range(.., "XXX...");
|
||||
}
|
||||
x if x < 20 => {
|
||||
secret.replace_range(2.., "XXXX...");
|
||||
}
|
||||
x if x < 30 => {
|
||||
secret.replace_range(3.., "XXXXX...");
|
||||
}
|
||||
_x => {
|
||||
secret.replace_range(5.., "XXXXXX...");
|
||||
}
|
||||
/// Redact a secret string, starting from the `secret_offset`th byte.
|
||||
pub fn hide_secret(secret: &mut String, secret_offset: usize) {
|
||||
match secret.len().checked_sub(secret_offset) {
|
||||
None => (),
|
||||
Some(x) if x < 10 => {
|
||||
secret.replace_range(secret_offset.., "XXX...");
|
||||
}
|
||||
Some(x) if x < 20 => {
|
||||
secret.replace_range((secret_offset + 2).., "XXXX...");
|
||||
}
|
||||
Some(x) if x < 30 => {
|
||||
secret.replace_range((secret_offset + 3).., "XXXXX...");
|
||||
}
|
||||
Some(_x) => {
|
||||
secret.replace_range((secret_offset + 5).., "XXXXXX...");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,9 @@ pub struct TaskView {
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
@@ -73,6 +76,7 @@ impl TaskView {
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network.clone(),
|
||||
custom_metadata: task.custom_metadata.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -142,6 +146,11 @@ pub struct DetailsView {
|
||||
pub old_index_uid: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub new_index_uid: Option<String>,
|
||||
// index compaction
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub pre_compaction_size: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub post_compaction_size: Option<String>,
|
||||
}
|
||||
|
||||
impl DetailsView {
|
||||
@@ -314,6 +323,24 @@ impl DetailsView {
|
||||
// We should never be able to batch multiple renames at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
pre_compaction_size: match (
|
||||
self.pre_compaction_size.clone(),
|
||||
other.pre_compaction_size.clone(),
|
||||
) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple compactions at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
post_compaction_size: match (
|
||||
self.post_compaction_size.clone(),
|
||||
other.post_compaction_size.clone(),
|
||||
) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple compactions at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -415,6 +442,15 @@ impl From<Details> for DetailsView {
|
||||
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
|
||||
..Default::default()
|
||||
},
|
||||
Details::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
|
||||
DetailsView {
|
||||
pre_compaction_size: pre_compaction_size
|
||||
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
post_compaction_size: post_compaction_size
|
||||
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,6 +45,9 @@ pub struct Task {
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
@@ -67,7 +70,8 @@ impl Task {
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid } => Some(index_uid),
|
||||
| IndexDeletion { index_uid }
|
||||
| IndexCompaction { index_uid } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,7 +98,8 @@ impl Task {
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::SnapshotCreation
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. } => None,
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::IndexCompaction { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -170,6 +175,9 @@ pub enum KindWithContent {
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
@@ -206,6 +214,7 @@ impl KindWithContent {
|
||||
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
|
||||
KindWithContent::Export { .. } => Kind::Export,
|
||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||
KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,7 +235,8 @@ impl KindWithContent {
|
||||
| DocumentClear { index_uid }
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
| IndexDeletion { index_uid } => vec![index_uid],
|
||||
| IndexDeletion { index_uid }
|
||||
| IndexCompaction { index_uid } => vec![index_uid],
|
||||
IndexUpdate { index_uid, new_index_uid, .. } => {
|
||||
let mut indexes = vec![index_uid.as_str()];
|
||||
if let Some(new_uid) = new_index_uid {
|
||||
@@ -325,6 +335,11 @@ impl KindWithContent {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -407,6 +422,11 @@ impl KindWithContent {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -469,6 +489,11 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -579,6 +604,7 @@ pub enum Kind {
|
||||
SnapshotCreation,
|
||||
Export,
|
||||
UpgradeDatabase,
|
||||
IndexCompaction,
|
||||
}
|
||||
|
||||
impl Kind {
|
||||
@@ -590,7 +616,8 @@ impl Kind {
|
||||
| Kind::SettingsUpdate
|
||||
| Kind::IndexCreation
|
||||
| Kind::IndexDeletion
|
||||
| Kind::IndexUpdate => true,
|
||||
| Kind::IndexUpdate
|
||||
| Kind::IndexCompaction => true,
|
||||
Kind::IndexSwap
|
||||
| Kind::TaskCancelation
|
||||
| Kind::TaskDeletion
|
||||
@@ -618,6 +645,7 @@ impl Display for Kind {
|
||||
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
|
||||
Kind::Export => write!(f, "export"),
|
||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||
Kind::IndexCompaction => write!(f, "indexCompaction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -653,6 +681,8 @@ impl FromStr for Kind {
|
||||
Ok(Kind::Export)
|
||||
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
|
||||
Ok(Kind::UpgradeDatabase)
|
||||
} else if kind.eq_ignore_ascii_case("indexCompaction") {
|
||||
Ok(Kind::IndexCompaction)
|
||||
} else {
|
||||
Err(ParseTaskKindError(kind.to_owned()))
|
||||
}
|
||||
@@ -738,6 +768,11 @@ pub enum Details {
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
pre_compaction_size: Option<Byte>,
|
||||
post_compaction_size: Option<Byte>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
@@ -800,6 +835,10 @@ impl Details {
|
||||
Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
|
||||
Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
|
||||
Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),
|
||||
Self::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
|
||||
*pre_compaction_size = None;
|
||||
*post_compaction_size = None;
|
||||
}
|
||||
Self::SettingsUpdate { .. }
|
||||
| Self::IndexInfo { .. }
|
||||
| Self::Dump { .. }
|
||||
|
||||
@@ -11,6 +11,24 @@ pub struct Webhook {
|
||||
pub headers: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
impl Webhook {
|
||||
pub fn redact_authorization_header(&mut self) {
|
||||
// headers are case insensitive, so to make the redaction robust we iterate over qualifying headers
|
||||
// rather than getting one canonical `Authorization` header.
|
||||
for value in self
|
||||
.headers
|
||||
.iter_mut()
|
||||
.filter_map(|(name, value)| name.eq_ignore_ascii_case("authorization").then_some(value))
|
||||
{
|
||||
if value.starts_with("Bearer ") {
|
||||
crate::settings::hide_secret(value, "Bearer ".len());
|
||||
} else {
|
||||
crate::settings::hide_secret(value, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WebhooksView {
|
||||
|
||||
@@ -91,7 +91,7 @@ time = { version = "0.3.41", features = [
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
toml = "0.8.23"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
|
||||
@@ -205,7 +205,10 @@ struct Infos {
|
||||
experimental_no_snapshot_compaction: bool,
|
||||
experimental_no_edition_2024_for_dumps: bool,
|
||||
experimental_no_edition_2024_for_settings: bool,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
experimental_vector_store_setting: bool,
|
||||
experimental_personalization: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@@ -215,6 +218,7 @@ struct Infos {
|
||||
import_snapshot: bool,
|
||||
schedule_snapshot: Option<u64>,
|
||||
snapshot_dir: bool,
|
||||
uses_s3_snapshots: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
http_addr: bool,
|
||||
@@ -283,6 +287,8 @@ impl Infos {
|
||||
indexer_options,
|
||||
config_file_path,
|
||||
no_analytics: _,
|
||||
experimental_personalization_api_key,
|
||||
s3_snapshot_options,
|
||||
} = options;
|
||||
|
||||
let schedule_snapshot = match schedule_snapshot {
|
||||
@@ -296,6 +302,8 @@ impl Infos {
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = indexer_options;
|
||||
|
||||
let RuntimeTogglableFeatures {
|
||||
@@ -344,6 +352,7 @@ impl Infos {
|
||||
import_snapshot: import_snapshot.is_some(),
|
||||
schedule_snapshot,
|
||||
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
||||
uses_s3_snapshots: s3_snapshot_options.is_some(),
|
||||
ignore_missing_snapshot,
|
||||
ignore_snapshot_if_db_exists,
|
||||
http_addr: http_addr != default_http_addr(),
|
||||
@@ -365,6 +374,9 @@ impl Infos {
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_personalization: experimental_personalization_api_key.is_some(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ use tokio::task::JoinError;
|
||||
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
|
||||
@@ -37,6 +38,8 @@ pub enum MeilisearchHttpError {
|
||||
PaginationInFederatedQuery(usize, &'static str),
|
||||
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
|
||||
FacetsInFederatedQuery(usize, String, Vec<String>),
|
||||
#[error("Inside `.queries[{0}]`: Using `.personalize` is not allowed in federated queries.\n - Hint: remove `personalize` from query #{0} or remove `federation` from the request")]
|
||||
PersonalizationInFederatedQuery(usize),
|
||||
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
|
||||
InconsistentFacetOrder {
|
||||
facet: String,
|
||||
@@ -136,6 +139,9 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
||||
Code::InvalidMultiSearchFacetOrder
|
||||
}
|
||||
MeilisearchHttpError::PersonalizationInFederatedQuery(_) => {
|
||||
Code::InvalidMultiSearchQueryPersonalization
|
||||
}
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
||||
Code::InconsistentDocumentChangeHeaders
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
#![allow(clippy::result_large_err)]
|
||||
#![allow(rustdoc::private_intra_doc_links)]
|
||||
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod analytics;
|
||||
@@ -9,6 +11,7 @@ pub mod middleware;
|
||||
pub mod option;
|
||||
#[cfg(test)]
|
||||
mod option_test;
|
||||
pub mod personalization;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
pub mod search_queue;
|
||||
@@ -56,6 +59,7 @@ use tracing::{error, info_span};
|
||||
use tracing_subscriber::filter::Targets;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::personalization::PersonalizationService;
|
||||
|
||||
/// Default number of simultaneously opened indexes.
|
||||
///
|
||||
@@ -126,12 +130,8 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered<
|
||||
>;
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
services: ServicesData,
|
||||
opt: Opt,
|
||||
logs: (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Data<Analytics>,
|
||||
enable_dashboard: bool,
|
||||
) -> actix_web::App<
|
||||
impl ServiceFactory<
|
||||
@@ -143,17 +143,7 @@ pub fn create_app(
|
||||
>,
|
||||
> {
|
||||
let app = actix_web::App::new()
|
||||
.configure(|s| {
|
||||
configure_data(
|
||||
s,
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
search_queue.clone(),
|
||||
&opt,
|
||||
logs,
|
||||
analytics.clone(),
|
||||
)
|
||||
})
|
||||
.configure(|s| configure_data(s, services, &opt))
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
@@ -214,7 +204,10 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
pub fn setup_meilisearch(
|
||||
opt: &Opt,
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let index_scheduler_opt = IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
@@ -228,7 +221,11 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
task_db_size: opt.max_task_db_size.as_u64() as usize,
|
||||
index_base_map_size: opt.max_index_size.as_u64() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: Arc::new((&opt.indexer_options).try_into()?),
|
||||
indexer_config: Arc::new({
|
||||
let s3_snapshot_options =
|
||||
opt.s3_snapshot_options.clone().map(|opt| opt.try_into()).transpose()?;
|
||||
IndexerConfig { s3_snapshot_options, ..(&opt.indexer_options).try_into()? }
|
||||
}),
|
||||
autobatching_enabled: true,
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
@@ -254,6 +251,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
index_scheduler_opt,
|
||||
OnFailure::RemoveDb,
|
||||
binary_version, // the db is empty
|
||||
handle,
|
||||
)?,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
@@ -271,7 +269,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
}
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let src_path_exists = path.exists();
|
||||
@@ -282,6 +280,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
index_scheduler_opt,
|
||||
OnFailure::RemoveDb,
|
||||
binary_version, // the db is empty
|
||||
handle,
|
||||
)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
@@ -302,10 +301,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
||||
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
}
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
};
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
@@ -336,6 +335,7 @@ fn open_or_create_database_unchecked(
|
||||
index_scheduler_opt: IndexSchedulerOptions,
|
||||
on_failure: OnFailure,
|
||||
version: (u32, u32, u32),
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
@@ -343,7 +343,7 @@ fn open_or_create_database_unchecked(
|
||||
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|
||||
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
|
||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version, Some(handle))?)
|
||||
};
|
||||
|
||||
match (
|
||||
@@ -450,6 +450,7 @@ fn open_or_create_database(
|
||||
index_scheduler_opt: IndexSchedulerOptions,
|
||||
empty_db: bool,
|
||||
binary_version: (u32, u32, u32),
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
let version = if !empty_db {
|
||||
check_version(opt, &index_scheduler_opt, binary_version)?
|
||||
@@ -457,7 +458,7 @@ fn open_or_create_database(
|
||||
binary_version
|
||||
};
|
||||
|
||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
|
||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version, handle)
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
@@ -525,7 +526,11 @@ fn import_dump(
|
||||
let indexer_config = if base_config.max_threads.is_none() {
|
||||
let (thread_pool, _) = default_thread_pool_and_threads();
|
||||
|
||||
let _config = IndexerConfig { thread_pool, ..*base_config };
|
||||
let _config = IndexerConfig {
|
||||
thread_pool,
|
||||
s3_snapshot_options: base_config.s3_snapshot_options.clone(),
|
||||
..*base_config
|
||||
};
|
||||
backup_config = _config;
|
||||
&backup_config
|
||||
} else {
|
||||
@@ -673,23 +678,26 @@ fn import_dump(
|
||||
Ok(index_scheduler_dump.finish()?)
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: Data<AuthController>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
opt: &Opt,
|
||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Data<Analytics>,
|
||||
) {
|
||||
pub fn configure_data(config: &mut web::ServiceConfig, services: ServicesData, opt: &Opt) {
|
||||
let ServicesData {
|
||||
index_scheduler,
|
||||
auth,
|
||||
search_queue,
|
||||
personalization_service,
|
||||
logs_route_handle,
|
||||
logs_stderr_handle,
|
||||
analytics,
|
||||
} = services;
|
||||
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||
config
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(search_queue)
|
||||
.app_data(analytics)
|
||||
.app_data(web::Data::new(logs_route))
|
||||
.app_data(web::Data::new(logs_stderr))
|
||||
.app_data(personalization_service)
|
||||
.app_data(logs_route_handle)
|
||||
.app_data(logs_stderr_handle)
|
||||
.app_data(web::Data::new(opt.clone()))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
@@ -750,3 +758,14 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
|
||||
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ServicesData {
|
||||
pub index_scheduler: Data<IndexScheduler>,
|
||||
pub auth: Data<AuthController>,
|
||||
pub search_queue: Data<SearchQueue>,
|
||||
pub personalization_service: Data<PersonalizationService>,
|
||||
pub logs_route_handle: Data<LogRouteHandle>,
|
||||
pub logs_stderr_handle: Data<LogStderrHandle>,
|
||||
pub analytics: Data<Analytics>,
|
||||
}
|
||||
|
||||
@@ -14,10 +14,11 @@ use index_scheduler::IndexScheduler;
|
||||
use is_terminal::IsTerminal;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::option::LogMode;
|
||||
use meilisearch::personalization::PersonalizationService;
|
||||
use meilisearch::search_queue::SearchQueue;
|
||||
use meilisearch::{
|
||||
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
@@ -76,7 +77,10 @@ fn on_panic(info: &std::panic::PanicHookInfo) {
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
try_main().await.inspect_err(|error| {
|
||||
// won't panic inside of tokio::main
|
||||
let runtime = tokio::runtime::Handle::current();
|
||||
|
||||
try_main(runtime).await.inspect_err(|error| {
|
||||
tracing::error!(%error);
|
||||
let mut current = error.source();
|
||||
let mut depth = 0;
|
||||
@@ -88,7 +92,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
})
|
||||
}
|
||||
|
||||
async fn try_main() -> anyhow::Result<()> {
|
||||
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
@@ -122,7 +126,7 @@ async fn try_main() -> anyhow::Result<()> {
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, runtime)?;
|
||||
|
||||
let analytics =
|
||||
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
|
||||
@@ -149,8 +153,15 @@ async fn run_http(
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
let auth_controller = Data::from(auth_controller);
|
||||
let auth = Data::from(auth_controller);
|
||||
let analytics = Data::from(analytics);
|
||||
// Create personalization service with API key from options
|
||||
let personalization_service = Data::new(
|
||||
opt.experimental_personalization_api_key
|
||||
.clone()
|
||||
.map(PersonalizationService::cohere)
|
||||
.unwrap_or_else(PersonalizationService::disabled),
|
||||
);
|
||||
let search_queue = SearchQueue::new(
|
||||
opt.experimental_search_queue_size,
|
||||
available_parallelism()
|
||||
@@ -162,21 +173,25 @@ async fn run_http(
|
||||
usize::from(opt.experimental_drop_search_after) as u64
|
||||
));
|
||||
let search_queue = Data::new(search_queue);
|
||||
let (logs_route_handle, logs_stderr_handle) = logs;
|
||||
let logs_route_handle = Data::new(logs_route_handle);
|
||||
let logs_stderr_handle = Data::new(logs_stderr_handle);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
search_queue.clone(),
|
||||
opt.clone(),
|
||||
logs.clone(),
|
||||
analytics.clone(),
|
||||
enable_dashboard,
|
||||
)
|
||||
})
|
||||
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
|
||||
.disable_signals()
|
||||
.keep_alive(KeepAlive::Os);
|
||||
let services = ServicesData {
|
||||
index_scheduler,
|
||||
auth,
|
||||
search_queue,
|
||||
personalization_service,
|
||||
logs_route_handle,
|
||||
logs_stderr_handle,
|
||||
analytics,
|
||||
};
|
||||
|
||||
let http_server =
|
||||
HttpServer::new(move || create_app(services.clone(), opt.clone(), enable_dashboard))
|
||||
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
|
||||
.disable_signals()
|
||||
.keep_alive(KeepAlive::Os);
|
||||
|
||||
if let Some(config) = opt_clone.get_ssl_config()? {
|
||||
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?;
|
||||
|
||||
@@ -114,4 +114,9 @@ lazy_static! {
|
||||
"Meilisearch Task Queue Size Until Stop Registering",
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
|
||||
"meilisearch_personalized_search_requests",
|
||||
"Meilisearch number of search requests with personalization"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
}
|
||||
|
||||
@@ -7,12 +7,13 @@ use std::ops::Deref;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{env, fmt, fs};
|
||||
|
||||
use byte_unit::{Byte, ParseError, UnitType};
|
||||
use clap::Parser;
|
||||
use meilisearch_types::features::InstanceTogglableFeatures;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::update::{IndexerConfig, S3SnapshotOptions};
|
||||
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
|
||||
use rustls::server::{ServerSessionMemoryCache, WebPkiClientVerifier};
|
||||
use rustls::RootCertStore;
|
||||
@@ -55,6 +56,10 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO
|
||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@@ -70,6 +75,22 @@ const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
|
||||
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
|
||||
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
|
||||
|
||||
// Related to S3 snapshots
|
||||
const MEILI_S3_BUCKET_URL: &str = "MEILI_S3_BUCKET_URL";
|
||||
const MEILI_S3_BUCKET_REGION: &str = "MEILI_S3_BUCKET_REGION";
|
||||
const MEILI_S3_BUCKET_NAME: &str = "MEILI_S3_BUCKET_NAME";
|
||||
const MEILI_S3_SNAPSHOT_PREFIX: &str = "MEILI_S3_SNAPSHOT_PREFIX";
|
||||
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
|
||||
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
|
||||
const MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS: &str = "MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS";
|
||||
const MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL: &str = "MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS: &str =
|
||||
"MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS";
|
||||
const MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE: &str = "MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
@@ -79,6 +100,10 @@ const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
|
||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
|
||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC_STR: &str = "86400";
|
||||
const DEFAULT_DUMP_DIR: &str = "dumps/";
|
||||
const DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS: NonZeroUsize = NonZeroUsize::new(10).unwrap();
|
||||
const DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL: u32 = 0;
|
||||
const DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS: u64 = 8 * 3600; // 8 hours
|
||||
const DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE: Byte = Byte::from_u64(375 * 1024 * 1024); // 375 MiB
|
||||
|
||||
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
|
||||
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
|
||||
@@ -471,10 +496,20 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_no_snapshot_compaction: bool,
|
||||
|
||||
/// Experimental personalization API key feature.
|
||||
///
|
||||
/// Sets the API key for personalization features.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
|
||||
pub experimental_personalization_api_key: Option<String>,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub s3_snapshot_options: Option<S3SnapshotOpts>,
|
||||
|
||||
/// Set the path to a configuration file that should be used to setup the engine.
|
||||
/// Format must be TOML.
|
||||
#[clap(long)]
|
||||
@@ -576,6 +611,8 @@ impl Opt {
|
||||
experimental_limit_batched_tasks_total_size,
|
||||
experimental_embedding_cache_entries,
|
||||
experimental_no_snapshot_compaction,
|
||||
experimental_personalization_api_key,
|
||||
s3_snapshot_options,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -676,7 +713,22 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
|
||||
experimental_no_snapshot_compaction.to_string(),
|
||||
);
|
||||
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
|
||||
experimental_personalization_api_key,
|
||||
);
|
||||
}
|
||||
indexer_options.export_to_env();
|
||||
if let Some(s3_snapshot_options) = s3_snapshot_options {
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = s3_snapshot_options;
|
||||
panic!("S3 snapshot options are not supported on Windows");
|
||||
}
|
||||
#[cfg(unix)]
|
||||
s3_snapshot_options.export_to_env();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
|
||||
@@ -772,6 +824,22 @@ pub struct IndexerOpts {
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_dumps: bool,
|
||||
|
||||
/// Experimental no edition 2024 to compute prefixes. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/862>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 to compute prefixes.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
|
||||
/// Experimental no edition 2024 to compute facets. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/862>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 to compute facets.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
@@ -783,6 +851,8 @@ impl IndexerOpts {
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
@@ -808,6 +878,18 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_dumps.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_prefix_post_processing {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING,
|
||||
experimental_no_edition_2024_for_prefix_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_facet_post_processing {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING,
|
||||
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -815,6 +897,16 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||
let IndexerOpts {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = other;
|
||||
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
|
||||
.build()?;
|
||||
@@ -822,17 +914,163 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
Ok(Self {
|
||||
thread_pool,
|
||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||
max_threads: *other.max_indexing_threads,
|
||||
max_memory: max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||
max_threads: max_indexing_threads.0,
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
experimental_no_edition_2024_for_settings: other
|
||||
.experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
|
||||
skip_index_budget: *skip_index_budget,
|
||||
experimental_no_edition_2024_for_settings: *experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps: *experimental_no_edition_2024_for_dumps,
|
||||
chunk_compression_type: Default::default(),
|
||||
chunk_compression_level: Default::default(),
|
||||
documents_chunk_size: Default::default(),
|
||||
max_nb_chunks: Default::default(),
|
||||
experimental_no_edition_2024_for_prefix_post_processing:
|
||||
*experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing:
|
||||
*experimental_no_edition_2024_for_facet_post_processing,
|
||||
s3_snapshot_options: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Parser, Deserialize)]
|
||||
// This group is a bit tricky but makes it possible to require all listed fields if one of them
|
||||
// is specified. It lets us keep an Option for the S3SnapshotOpts configuration.
|
||||
// <https://github.com/clap-rs/clap/issues/5092#issuecomment-2616986075>
|
||||
#[group(requires_all = ["s3_bucket_url", "s3_bucket_region", "s3_bucket_name", "s3_snapshot_prefix", "s3_access_key", "s3_secret_key"])]
|
||||
pub struct S3SnapshotOpts {
|
||||
/// The S3 bucket URL in the format https://s3.<region>.amazonaws.com.
|
||||
#[clap(long, env = MEILI_S3_BUCKET_URL, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_bucket_url: String,
|
||||
|
||||
/// The region in the format us-east-1.
|
||||
#[clap(long, env = MEILI_S3_BUCKET_REGION, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_bucket_region: String,
|
||||
|
||||
/// The bucket name.
|
||||
#[clap(long, env = MEILI_S3_BUCKET_NAME, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_bucket_name: String,
|
||||
|
||||
/// The prefix path where to put the snapshot, uses normal slashes (/).
|
||||
#[clap(long, env = MEILI_S3_SNAPSHOT_PREFIX, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_snapshot_prefix: String,
|
||||
|
||||
/// The S3 access key.
|
||||
#[clap(long, env = MEILI_S3_ACCESS_KEY, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_access_key: String,
|
||||
|
||||
/// The S3 secret key.
|
||||
#[clap(long, env = MEILI_S3_SECRET_KEY, required = false)]
|
||||
#[serde(default)]
|
||||
pub s3_secret_key: String,
|
||||
|
||||
/// The maximum number of parts that can be uploaded in parallel.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS, default_value_t = default_experimental_s3_snapshot_max_in_flight_parts())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_max_in_flight_parts")]
|
||||
pub experimental_s3_max_in_flight_parts: NonZeroUsize,
|
||||
|
||||
/// The compression level. Defaults to no compression (0).
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL, default_value_t = default_experimental_s3_snapshot_compression_level())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_compression_level")]
|
||||
pub experimental_s3_compression_level: u32,
|
||||
|
||||
/// The signature duration for the multipart upload.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS, default_value_t = default_experimental_s3_snapshot_signature_duration_seconds())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_signature_duration_seconds")]
|
||||
pub experimental_s3_signature_duration_seconds: u64,
|
||||
|
||||
/// The size of the the multipart parts.
|
||||
///
|
||||
/// Must not be less than 10MiB and larger than 8GiB. Yes,
|
||||
/// twice the boundaries of the AWS S3 multipart upload
|
||||
/// because we use it a bit differently internally.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE, default_value_t = default_experimental_s3_snapshot_multipart_part_size())]
|
||||
#[serde(default = "default_experimental_s3_snapshot_multipart_part_size")]
|
||||
pub experimental_s3_multipart_part_size: Byte,
|
||||
}
|
||||
|
||||
impl S3SnapshotOpts {
|
||||
/// Exports the values to their corresponding env vars if they are not set.
|
||||
pub fn export_to_env(self) {
|
||||
let S3SnapshotOpts {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
experimental_s3_max_in_flight_parts,
|
||||
experimental_s3_compression_level,
|
||||
experimental_s3_signature_duration_seconds,
|
||||
experimental_s3_multipart_part_size,
|
||||
} = self;
|
||||
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET_URL, s3_bucket_url);
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET_REGION, s3_bucket_region);
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET_NAME, s3_bucket_name);
|
||||
export_to_env_if_not_present(MEILI_S3_SNAPSHOT_PREFIX, s3_snapshot_prefix);
|
||||
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
|
||||
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS,
|
||||
experimental_s3_max_in_flight_parts.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL,
|
||||
experimental_s3_compression_level.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS,
|
||||
experimental_s3_signature_duration_seconds.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE,
|
||||
experimental_s3_multipart_part_size.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<S3SnapshotOpts> for S3SnapshotOptions {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: S3SnapshotOpts) -> Result<Self, Self::Error> {
|
||||
let S3SnapshotOpts {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
experimental_s3_max_in_flight_parts,
|
||||
experimental_s3_compression_level,
|
||||
experimental_s3_signature_duration_seconds,
|
||||
experimental_s3_multipart_part_size,
|
||||
} = other;
|
||||
|
||||
Ok(S3SnapshotOptions {
|
||||
s3_bucket_url,
|
||||
s3_bucket_region,
|
||||
s3_bucket_name,
|
||||
s3_snapshot_prefix,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_max_in_flight_parts: experimental_s3_max_in_flight_parts,
|
||||
s3_compression_level: experimental_s3_compression_level,
|
||||
s3_signature_duration: Duration::from_secs(experimental_s3_signature_duration_seconds),
|
||||
s3_multipart_part_size: experimental_s3_multipart_part_size.as_u64(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1051,6 +1289,22 @@ fn default_snapshot_interval_sec() -> &'static str {
|
||||
DEFAULT_SNAPSHOT_INTERVAL_SEC_STR
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_max_in_flight_parts() -> NonZeroUsize {
|
||||
DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_compression_level() -> u32 {
|
||||
DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_signature_duration_seconds() -> u64 {
|
||||
DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS
|
||||
}
|
||||
|
||||
fn default_experimental_s3_snapshot_multipart_part_size() -> Byte {
|
||||
DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE
|
||||
}
|
||||
|
||||
fn default_dump_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||
}
|
||||
|
||||
366
crates/meilisearch/src/personalization/mod.rs
Normal file
366
crates/meilisearch/src/personalization/mod.rs
Normal file
@@ -0,0 +1,366 @@
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
use meilisearch_types::{
|
||||
error::{Code, ErrorCode, ResponseError},
|
||||
milli::TimeBudget,
|
||||
};
|
||||
use rand::Rng;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
|
||||
const MAX_RETRIES: u32 = 10;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum PersonalizationError {
|
||||
#[error("Personalization service: HTTP request failed: {0}")]
|
||||
Request(#[from] reqwest::Error),
|
||||
#[error("Personalization service: Failed to parse response: {0}")]
|
||||
Parse(String),
|
||||
#[error("Personalization service: Cohere API error: {0}")]
|
||||
Api(String),
|
||||
#[error("Personalization service: Unauthorized: invalid API key")]
|
||||
Unauthorized,
|
||||
#[error("Personalization service: Rate limited: too many requests")]
|
||||
RateLimited,
|
||||
#[error("Personalization service: Bad request: {0}")]
|
||||
BadRequest(String),
|
||||
#[error("Personalization service: Internal server error: {0}")]
|
||||
InternalServerError(String),
|
||||
#[error("Personalization service: Network error: {0}")]
|
||||
Network(String),
|
||||
#[error("Personalization service: Deadline exceeded")]
|
||||
DeadlineExceeded,
|
||||
#[error(transparent)]
|
||||
FeatureNotEnabled(#[from] index_scheduler::error::FeatureNotEnabledError),
|
||||
}
|
||||
|
||||
impl ErrorCode for PersonalizationError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
PersonalizationError::FeatureNotEnabled { .. } => Code::FeatureNotEnabled,
|
||||
PersonalizationError::Unauthorized => Code::RemoteInvalidApiKey,
|
||||
PersonalizationError::RateLimited => Code::TooManySearchRequests,
|
||||
PersonalizationError::BadRequest(_) => Code::RemoteBadRequest,
|
||||
PersonalizationError::InternalServerError(_) => Code::RemoteRemoteError,
|
||||
PersonalizationError::Network(_) | PersonalizationError::Request(_) => {
|
||||
Code::RemoteCouldNotSendRequest
|
||||
}
|
||||
PersonalizationError::Parse(_) | PersonalizationError::Api(_) => {
|
||||
Code::RemoteBadResponse
|
||||
}
|
||||
PersonalizationError::DeadlineExceeded => Code::Internal, // should not be returned to the client
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CohereService {
|
||||
client: Client,
|
||||
api_key: String,
|
||||
}
|
||||
|
||||
impl CohereService {
|
||||
pub fn new(api_key: String) -> Self {
|
||||
info!("Personalization service initialized with Cohere API");
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()
|
||||
.expect("Failed to create HTTP client");
|
||||
Self { client, api_key }
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: &Personalize,
|
||||
query: Option<&str>,
|
||||
time_budget: TimeBudget,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
if time_budget.exceeded() {
|
||||
warn!("Could not rerank due to deadline");
|
||||
// If the deadline is exceeded, return the original search result instead of an error
|
||||
return Ok(search_result);
|
||||
}
|
||||
|
||||
// Extract user context from personalization
|
||||
let user_context = personalize.user_context.as_str();
|
||||
|
||||
// Build the prompt by merging query and user context
|
||||
let prompt = match query {
|
||||
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
|
||||
None => format!("User Context: {user_context}"),
|
||||
};
|
||||
|
||||
// Extract documents for reranking
|
||||
let documents: Vec<String> = search_result
|
||||
.hits
|
||||
.iter()
|
||||
.map(|hit| {
|
||||
// Convert the document to a string representation for reranking
|
||||
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
|
||||
})
|
||||
.collect();
|
||||
|
||||
if documents.is_empty() {
|
||||
return Ok(search_result);
|
||||
}
|
||||
|
||||
// Call Cohere's rerank API with retry logic
|
||||
let reranked_indices =
|
||||
match self.call_rerank_with_retry(&prompt, &documents, time_budget).await {
|
||||
Ok(indices) => indices,
|
||||
Err(PersonalizationError::DeadlineExceeded) => {
|
||||
// If the deadline is exceeded, return the original search result instead of an error
|
||||
return Ok(search_result);
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
|
||||
|
||||
// Reorder the hits based on Cohere's reranking
|
||||
let mut reranked_hits = Vec::new();
|
||||
for index in reranked_indices.iter() {
|
||||
if let Some(hit) = search_result.hits.get(*index) {
|
||||
reranked_hits.push(hit.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(SearchResult { hits: reranked_hits, ..search_result })
|
||||
}
|
||||
|
||||
async fn call_rerank_with_retry(
|
||||
&self,
|
||||
query: &str,
|
||||
documents: &[String],
|
||||
time_budget: TimeBudget,
|
||||
) -> Result<Vec<usize>, PersonalizationError> {
|
||||
let request_body = CohereRerankRequest {
|
||||
query: query.to_string(),
|
||||
documents: documents.to_vec(),
|
||||
model: "rerank-english-v3.0".to_string(),
|
||||
};
|
||||
|
||||
// Retry loop similar to vector extraction
|
||||
for attempt in 0..MAX_RETRIES {
|
||||
let response_result = self.send_rerank_request(&request_body).await;
|
||||
|
||||
let retry_duration = match self.handle_response(response_result).await {
|
||||
Ok(indices) => return Ok(indices),
|
||||
Err(retry) => {
|
||||
warn!("Cohere rerank attempt #{} failed: {}", attempt, retry.error);
|
||||
|
||||
if time_budget.exceeded() {
|
||||
warn!("Could not rerank due to deadline");
|
||||
return Err(PersonalizationError::DeadlineExceeded);
|
||||
} else {
|
||||
match retry.into_duration(attempt) {
|
||||
Ok(d) => d,
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// randomly up to double the retry duration
|
||||
let retry_duration = retry_duration
|
||||
+ rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration);
|
||||
|
||||
warn!("Retrying after {}ms", retry_duration.as_millis());
|
||||
tokio::time::sleep(retry_duration).await;
|
||||
}
|
||||
|
||||
// Final attempt without retry
|
||||
let response_result = self.send_rerank_request(&request_body).await;
|
||||
|
||||
match self.handle_response(response_result).await {
|
||||
Ok(indices) => Ok(indices),
|
||||
Err(retry) => Err(retry.into_error()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_rerank_request(
|
||||
&self,
|
||||
request_body: &CohereRerankRequest,
|
||||
) -> Result<reqwest::Response, reqwest::Error> {
|
||||
self.client
|
||||
.post(COHERE_API_URL)
|
||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||
.header("Content-Type", "application/json")
|
||||
.json(request_body)
|
||||
.send()
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_response(
|
||||
&self,
|
||||
response_result: Result<reqwest::Response, reqwest::Error>,
|
||||
) -> Result<Vec<usize>, Retry> {
|
||||
let response = match response_result {
|
||||
Ok(r) => r,
|
||||
Err(e) if e.is_timeout() => {
|
||||
return Err(Retry::retry_later(PersonalizationError::Network(format!(
|
||||
"Request timeout: {}",
|
||||
e
|
||||
))));
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(Retry::retry_later(PersonalizationError::Network(format!(
|
||||
"Network error: {}",
|
||||
e
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
let status = response.status();
|
||||
let status_code = status.as_u16();
|
||||
|
||||
if status.is_success() {
|
||||
let rerank_response: CohereRerankResponse = match response.json().await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(Retry::retry_later(PersonalizationError::Parse(format!(
|
||||
"Failed to parse response: {}",
|
||||
e
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
// Extract indices from rerank results
|
||||
let indices: Vec<usize> =
|
||||
rerank_response.results.iter().map(|result| result.index as usize).collect();
|
||||
|
||||
return Ok(indices);
|
||||
}
|
||||
|
||||
// Handle error status codes
|
||||
let error_body = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
|
||||
let retry = match status_code {
|
||||
401 => Retry::give_up(PersonalizationError::Unauthorized),
|
||||
429 => Retry::rate_limited(PersonalizationError::RateLimited),
|
||||
400 => Retry::give_up(PersonalizationError::BadRequest(error_body)),
|
||||
500..=599 => Retry::retry_later(PersonalizationError::InternalServerError(format!(
|
||||
"Status {}: {}",
|
||||
status_code, error_body
|
||||
))),
|
||||
402..=499 => Retry::give_up(PersonalizationError::Api(format!(
|
||||
"Status {}: {}",
|
||||
status_code, error_body
|
||||
))),
|
||||
_ => Retry::retry_later(PersonalizationError::Api(format!(
|
||||
"Unexpected status {}: {}",
|
||||
status_code, error_body
|
||||
))),
|
||||
};
|
||||
|
||||
Err(retry)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct CohereRerankRequest {
|
||||
query: String,
|
||||
documents: Vec<String>,
|
||||
model: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct CohereRerankResponse {
|
||||
results: Vec<CohereRerankResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct CohereRerankResult {
|
||||
index: u32,
|
||||
}
|
||||
|
||||
// Retry strategy similar to vector extraction
|
||||
struct Retry {
|
||||
error: PersonalizationError,
|
||||
strategy: RetryStrategy,
|
||||
}
|
||||
|
||||
enum RetryStrategy {
|
||||
GiveUp,
|
||||
Retry,
|
||||
RetryAfterRateLimit,
|
||||
}
|
||||
|
||||
impl Retry {
|
||||
fn give_up(error: PersonalizationError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::GiveUp }
|
||||
}
|
||||
|
||||
fn retry_later(error: PersonalizationError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::Retry }
|
||||
}
|
||||
|
||||
fn rate_limited(error: PersonalizationError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
|
||||
}
|
||||
|
||||
fn into_duration(self, attempt: u32) -> Result<Duration, PersonalizationError> {
|
||||
match self.strategy {
|
||||
RetryStrategy::GiveUp => Err(self.error),
|
||||
RetryStrategy::Retry => {
|
||||
// Exponential backoff: 10^attempt milliseconds
|
||||
Ok(Duration::from_millis((10u64).pow(attempt)))
|
||||
}
|
||||
RetryStrategy::RetryAfterRateLimit => {
|
||||
// Longer backoff for rate limits: 100ms + exponential
|
||||
Ok(Duration::from_millis(100 + (10u64).pow(attempt)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn into_error(self) -> PersonalizationError {
|
||||
self.error
|
||||
}
|
||||
}
|
||||
|
||||
pub enum PersonalizationService {
|
||||
Cohere(CohereService),
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl PersonalizationService {
|
||||
pub fn cohere(api_key: String) -> Self {
|
||||
// If the API key is empty, consider the personalization service as disabled
|
||||
if api_key.trim().is_empty() {
|
||||
Self::disabled()
|
||||
} else {
|
||||
Self::Cohere(CohereService::new(api_key))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn disabled() -> Self {
|
||||
debug!("Personalization service disabled");
|
||||
Self::Disabled
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: &Personalize,
|
||||
query: Option<&str>,
|
||||
time_budget: TimeBudget,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
match self {
|
||||
Self::Cohere(cohere_service) => {
|
||||
cohere_service
|
||||
.rerank_search_results(search_result, personalize, query, time_budget)
|
||||
.await
|
||||
}
|
||||
Self::Disabled => Err(PersonalizationError::FeatureNotEnabled(
|
||||
index_scheduler::error::FeatureNotEnabledError {
|
||||
disabled_action: "reranking search results",
|
||||
feature: "personalization",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/866",
|
||||
},
|
||||
)
|
||||
.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
84
crates/meilisearch/src/routes/indexes/compact.rs
Normal file
84
crates/meilisearch/src/routes/indexes/compact.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
use utoipa::OpenApi;
|
||||
|
||||
use super::ActionPolicy;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(compact),
|
||||
tags(
|
||||
(
|
||||
name = "Compact an index",
|
||||
description = "The /compact route uses compacts the database to reorganize and make it smaller and more efficient.",
|
||||
external_docs(url = "https://www.meilisearch.com/docs/reference/api/compact"),
|
||||
),
|
||||
),
|
||||
)]
|
||||
pub struct CompactApi;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(compact))));
|
||||
}
|
||||
|
||||
/// Compact an index
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "{indexUid}/compact",
|
||||
tag = "Compact an index",
|
||||
security(("Bearer" = ["search", "*"])),
|
||||
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
|
||||
responses(
|
||||
(status = ACCEPTED, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
|
||||
{
|
||||
"taskUid": 147,
|
||||
"indexUid": null,
|
||||
"status": "enqueued",
|
||||
"type": "documentDeletion",
|
||||
"enqueuedAt": "2024-08-08T17:05:55.791772Z"
|
||||
}
|
||||
)),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
)
|
||||
)]
|
||||
pub async fn compact(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_COMPACT }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
analytics.publish(IndexCompacted::default(), &req);
|
||||
|
||||
let task = KindWithContent::IndexCompaction { index_uid: index_uid.to_string() };
|
||||
let task =
|
||||
match tokio::task::spawn_blocking(move || index_scheduler.register(task, None, false))
|
||||
.await?
|
||||
{
|
||||
Ok(task) => task,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
debug!(returns = ?task, "Compact the {index_uid} index");
|
||||
Ok(HttpResponse::Accepted().json(SummarizedTaskView::from(task)))
|
||||
}
|
||||
|
||||
crate::empty_analytics!(IndexCompacted, "Index Compacted");
|
||||
@@ -333,10 +333,12 @@ impl Aggregate for DocumentsDeletionAggregator {
|
||||
pub async fn delete_document(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<DocumentParam>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
let network = index_scheduler.network();
|
||||
@@ -359,7 +361,10 @@ pub async fn delete_document(
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
@@ -678,6 +683,19 @@ pub struct UpdateDocumentsQuery {
|
||||
#[param(value_type = char, default = ",", example = ";")]
|
||||
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
|
||||
pub csv_delimiter: Option<u8>,
|
||||
|
||||
#[param(example = "custom")]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Deserr, IntoParams)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[into_params(parameter_in = Query, rename_all = "camelCase")]
|
||||
pub struct CustomMetadataQuery {
|
||||
#[param(example = "custom")]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
fn from_char_csv_delimiter(
|
||||
@@ -819,6 +837,7 @@ pub async fn replace_documents(
|
||||
body,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
uid,
|
||||
params.custom_metadata,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
@@ -921,6 +940,7 @@ pub async fn update_documents(
|
||||
body,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
uid,
|
||||
params.custom_metadata,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
@@ -940,6 +960,7 @@ async fn document_addition(
|
||||
body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
allow_index_creation: bool,
|
||||
req: &HttpRequest,
|
||||
@@ -1065,8 +1086,10 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
|
||||
.await?
|
||||
let task = match tokio::task::spawn_blocking(move || {
|
||||
scheduler.register_with_custom_metadata(task, task_id, custom_metadata, dry_run)
|
||||
})
|
||||
.await?
|
||||
{
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
@@ -1130,7 +1153,7 @@ async fn copy_body_to_file(
|
||||
/// Delete a set of documents based on an array of document ids.
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "{indexUid}/delete-batch",
|
||||
path = "{indexUid}/documents/delete-batch",
|
||||
tag = "Documents",
|
||||
security(("Bearer" = ["documents.delete", "documents.*", "*"])),
|
||||
params(
|
||||
@@ -1161,11 +1184,14 @@ pub async fn delete_documents_batch(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
@@ -1190,7 +1216,10 @@ pub async fn delete_documents_batch(
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
@@ -1244,12 +1273,15 @@ pub struct DocumentDeletionByFilter {
|
||||
pub async fn delete_documents_by_filter(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner();
|
||||
@@ -1282,7 +1314,10 @@ pub async fn delete_documents_by_filter(
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
@@ -1372,12 +1407,14 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
|
||||
pub async fn edit_documents_by_function(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
body: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Edit documents by function");
|
||||
debug!(parameters = ?body, "Edit documents by function");
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
index_scheduler
|
||||
.features()
|
||||
@@ -1387,23 +1424,23 @@ pub async fn edit_documents_by_function(
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let params = params.into_inner();
|
||||
let body = body.into_inner();
|
||||
|
||||
analytics.publish(
|
||||
EditDocumentsByFunctionAggregator {
|
||||
filtered: params.filter.is_some(),
|
||||
with_context: params.context.is_some(),
|
||||
filtered: body.filter.is_some(),
|
||||
with_context: body.context.is_some(),
|
||||
index_creation: index_scheduler.index(&index_uid).is_err(),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
let engine = milli::rhai::Engine::new();
|
||||
if let Err(e) = engine.compile(¶ms.function) {
|
||||
if let Err(e) = engine.compile(&body.function) {
|
||||
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = params.filter {
|
||||
if let Some(ref filter) = body.filter {
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(
|
||||
filter,
|
||||
@@ -1414,8 +1451,8 @@ pub async fn edit_documents_by_function(
|
||||
}
|
||||
let task = KindWithContent::DocumentEdition {
|
||||
index_uid: index_uid.clone(),
|
||||
filter_expr: params.filter.clone(),
|
||||
context: match params.context.clone() {
|
||||
filter_expr: body.filter.clone(),
|
||||
context: match body.context.clone() {
|
||||
Some(Value::Object(m)) => Some(m),
|
||||
None => None,
|
||||
_ => {
|
||||
@@ -1425,18 +1462,21 @@ pub async fn edit_documents_by_function(
|
||||
))
|
||||
}
|
||||
},
|
||||
function: params.function.clone(),
|
||||
function: body.function.clone(),
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
@@ -1477,12 +1517,14 @@ pub async fn edit_documents_by_function(
|
||||
pub async fn clear_all_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1501,7 +1543,10 @@ pub async fn clear_all_documents(
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
tokio::task::spawn_blocking(move || {
|
||||
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||
})
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
|
||||
@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::is_dry_run;
|
||||
use crate::Opt;
|
||||
|
||||
pub mod compact;
|
||||
pub mod documents;
|
||||
mod enterprise_edition;
|
||||
pub mod facet_search;
|
||||
@@ -49,8 +50,9 @@ pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TAS
|
||||
(path = "/", api = facet_search::FacetSearchApi),
|
||||
(path = "/", api = similar::SimilarApi),
|
||||
(path = "/", api = settings::SettingsApi),
|
||||
(path = "/", api = compact::CompactApi),
|
||||
),
|
||||
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats),
|
||||
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats, compact::compact),
|
||||
tags(
|
||||
(
|
||||
name = "Indexes",
|
||||
@@ -80,7 +82,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/similar").configure(similar::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
.service(web::scope("/settings").configure(settings::configure))
|
||||
.service(web::scope("/compact").configure(compact::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
use utoipa::{IntoParams, OpenApi};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
@@ -21,11 +22,12 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
|
||||
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult,
|
||||
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -132,6 +134,8 @@ pub struct SearchQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
||||
#[param(value_type = Vec<Locale>, explode = false)]
|
||||
pub locales: Option<CS<Locale>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub personalize_user_context: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
@@ -203,6 +207,9 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
));
|
||||
}
|
||||
|
||||
let personalize =
|
||||
other.personalize_user_context.map(|user_context| Personalize { user_context });
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
// `media` not supported for `GET`
|
||||
@@ -232,6 +239,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||
personalize,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -320,12 +328,14 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
||||
pub async fn search_with_url_query(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Search get");
|
||||
let request_uid = Uuid::now_v7();
|
||||
debug!(request_uid = ?request_uid, parameters = ?params, "Search get");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query: SearchQuery = params.into_inner().try_into()?;
|
||||
@@ -339,31 +349,56 @@ pub async fn search_with_url_query(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.take();
|
||||
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
if let Ok((search_result, _)) = search_result.as_ref() {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let search_result = search_result?;
|
||||
let (mut search_result, time_budget) = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search get");
|
||||
// Apply personalization if requested
|
||||
if let Some(personalize) = personalize.as_ref() {
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(
|
||||
search_result,
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
@@ -426,15 +461,17 @@ pub async fn search_with_url_query(
|
||||
pub async fn search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<SearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let request_uid = Uuid::now_v7();
|
||||
|
||||
let mut query = params.into_inner();
|
||||
debug!(parameters = ?query, "Search post");
|
||||
debug!(request_uid = ?request_uid, parameters = ?query, "Search post");
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
@@ -445,25 +482,37 @@ pub async fn search_with_post(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.take();
|
||||
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
if let Ok((ref search_result, _)) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
if search_result.degraded {
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||
@@ -471,9 +520,21 @@ pub async fn search_with_post(
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let search_result = search_result?;
|
||||
let (mut search_result, time_budget) = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search post");
|
||||
// Apply personalization if requested
|
||||
if let Some(personalize) = personalize.as_ref() {
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(
|
||||
search_result,
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use serde_json::{json, Value};
|
||||
|
||||
use crate::aggregate_methods;
|
||||
use crate::analytics::{Aggregate, AggregateMethod};
|
||||
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
|
||||
use crate::search::{
|
||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
@@ -95,6 +96,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
|
||||
// personalization
|
||||
total_personalized: usize,
|
||||
|
||||
marker: std::marker::PhantomData<Method>,
|
||||
}
|
||||
|
||||
@@ -129,6 +133,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -204,6 +209,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
ret.locales = locales.iter().copied().collect();
|
||||
}
|
||||
|
||||
// personalization
|
||||
if personalize.is_some() {
|
||||
ret.total_personalized = 1;
|
||||
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
|
||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
||||
@@ -234,6 +245,8 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
facet_stats: _,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
request_uid: _,
|
||||
metadata: _,
|
||||
} = result;
|
||||
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
@@ -294,6 +307,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
mut locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *new;
|
||||
|
||||
@@ -379,6 +393,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
// locales
|
||||
self.locales.append(&mut locales);
|
||||
|
||||
// personalization
|
||||
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
@@ -424,6 +441,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *self;
|
||||
|
||||
@@ -497,6 +515,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
"personalization": {
|
||||
"total_personalized": total_personalized,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,12 +180,6 @@ pub async fn get_metrics(
|
||||
|
||||
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
|
||||
|
||||
// We cannot specify the version with ContentType(TEXT_PLAIN_UTF_8) so we have to write everything by hand :(
|
||||
// see the following for what should be returned: https://prometheus.io/docs/instrumenting/content_negotiation/#content-type-response
|
||||
let content_type = ("content-type", "text/plain; version=0.0.4; charset=utf-8");
|
||||
|
||||
Ok(HttpResponse::Ok()
|
||||
// .insert_header(header::ContentType(mime::TEXT_PLAIN_UTF_8))
|
||||
.insert_header(content_type)
|
||||
.body(response))
|
||||
let content_type = ("content-type", prometheus::TEXT_FORMAT);
|
||||
Ok(HttpResponse::Ok().insert_header(content_type).body(response))
|
||||
}
|
||||
|
||||
@@ -41,10 +41,13 @@ use crate::routes::indexes::IndexView;
|
||||
use crate::routes::multi_search::SearchResults;
|
||||
use crate::routes::network::{Network, Remote};
|
||||
use crate::routes::swap_indexes::SwapIndexesPayload;
|
||||
use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata};
|
||||
use crate::routes::webhooks::{
|
||||
WebhookResults, WebhookSettings, WebhookWithMetadataRedactedAuthorization,
|
||||
};
|
||||
use crate::search::{
|
||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
|
||||
INCLUDE_METADATA_HEADER,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
use crate::Opt;
|
||||
@@ -102,7 +105,7 @@ mod webhooks;
|
||||
url = "/",
|
||||
description = "Local server",
|
||||
)),
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadata, meilisearch_types::milli::vector::VectorStoreBackend))
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadataRedactedAuthorization, meilisearch_types::milli::vector::VectorStoreBackend))
|
||||
)]
|
||||
pub struct MeilisearchApi;
|
||||
|
||||
@@ -184,6 +187,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
.is_some_and(|s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
/// Parse the `Meili-Include-Metadata` header from an HTTP request.
|
||||
///
|
||||
/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive).
|
||||
/// Returns `false` if the header is not present or has any other value.
|
||||
pub fn parse_include_metadata_header(req: &HttpRequest) -> bool {
|
||||
req.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|v| matches!(v.to_lowercase().as_str(), "true" | "1"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
@@ -203,6 +218,8 @@ pub struct SummarizedTaskView {
|
||||
deserialize_with = "time::serde::rfc3339::deserialize"
|
||||
)]
|
||||
enqueued_at: OffsetDateTime,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
custom_metadata: Option<String>,
|
||||
}
|
||||
|
||||
impl From<Task> for SummarizedTaskView {
|
||||
@@ -213,6 +230,7 @@ impl From<Task> for SummarizedTaskView {
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
enqueued_at: task.enqueued_at,
|
||||
custom_metadata: task.custom_metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use meilisearch_types::keys::actions;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::multi_search_analytics::MultiSearchAggregator;
|
||||
use crate::analytics::Analytics;
|
||||
@@ -17,10 +18,11 @@ use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
||||
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex,
|
||||
SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -144,6 +146,7 @@ pub struct SearchResults {
|
||||
pub async fn multi_search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
@@ -151,6 +154,7 @@ pub async fn multi_search_with_post(
|
||||
// Since we don't want to process half of the search requests and then get a permit refused
|
||||
// we're going to get one permit for the whole duration of the multi-search request.
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let request_uid = Uuid::now_v7();
|
||||
|
||||
let federated_search = params.into_inner();
|
||||
|
||||
@@ -186,16 +190,31 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
let response = match federation {
|
||||
Some(federation) => {
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
federation = ?federation,
|
||||
parameters = ?queries,
|
||||
"Federated-search"
|
||||
);
|
||||
|
||||
// check remote header
|
||||
let is_proxy = req
|
||||
.headers()
|
||||
.get(PROXY_SEARCH_HEADER)
|
||||
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
|
||||
let search_result =
|
||||
perform_federated_search(&index_scheduler, queries, federation, features, is_proxy)
|
||||
.await;
|
||||
let search_result = perform_federated_search(
|
||||
&index_scheduler,
|
||||
queries,
|
||||
federation,
|
||||
features,
|
||||
is_proxy,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
if search_result.is_ok() {
|
||||
@@ -203,6 +222,13 @@ pub async fn multi_search_with_post(
|
||||
}
|
||||
|
||||
analytics.publish(multi_aggregate, &req);
|
||||
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
returns = ?search_result,
|
||||
"Federated-search"
|
||||
);
|
||||
|
||||
HttpResponse::Ok().json(search_result?)
|
||||
}
|
||||
None => {
|
||||
@@ -211,12 +237,17 @@ pub async fn multi_search_with_post(
|
||||
// changes.
|
||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||
let mut search_results = Vec::with_capacity(queries.len());
|
||||
for (query_index, (index_uid, query, federation_options)) in queries
|
||||
for (query_index, (index_uid, mut query, federation_options)) in queries
|
||||
.into_iter()
|
||||
.map(SearchQueryWithIndex::into_index_query_federation)
|
||||
.enumerate()
|
||||
{
|
||||
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
on_index = query_index,
|
||||
parameters = ?query,
|
||||
"Multi-search"
|
||||
);
|
||||
|
||||
if federation_options.is_some() {
|
||||
return Err((
|
||||
@@ -239,6 +270,13 @@ pub async fn multi_search_with_post(
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.take();
|
||||
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query =
|
||||
personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let index_uid_str = index_uid.to_string();
|
||||
|
||||
let search_kind = search_kind(
|
||||
@@ -250,22 +288,40 @@ pub async fn multi_search_with_post(
|
||||
.with_index(query_index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid_str.clone(),
|
||||
SearchParams {
|
||||
index_uid: index_uid_str.clone(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
features,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?
|
||||
.with_index(query_index)?;
|
||||
|
||||
// Apply personalization if requested
|
||||
if let Some(personalize) = personalize.as_ref() {
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(
|
||||
search_result,
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
)
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
}
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
result: search_result.with_index(query_index)?,
|
||||
result: search_result,
|
||||
});
|
||||
}
|
||||
Ok(search_results)
|
||||
@@ -286,7 +342,11 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
debug!(returns = ?search_results, "Multi-search");
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
returns = ?search_results,
|
||||
"Multi-search"
|
||||
);
|
||||
|
||||
HttpResponse::Ok().json(SearchResults { results: search_results })
|
||||
}
|
||||
|
||||
@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
locales: _,
|
||||
personalize: _,
|
||||
} in &federated_search.queries
|
||||
{
|
||||
if let Some(federation_options) = federation_options {
|
||||
|
||||
@@ -226,14 +226,14 @@ mod tests {
|
||||
{
|
||||
let params = "types=createIndex";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r#"
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use core::convert::Infallible;
|
||||
use std::collections::BTreeMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -7,7 +8,6 @@ use actix_http::header::{
|
||||
};
|
||||
use actix_web::web::{self, Data, Path};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use core::convert::Infallible;
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use index_scheduler::IndexScheduler;
|
||||
@@ -24,12 +24,12 @@ use tracing::debug;
|
||||
use url::Url;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
use uuid::Uuid;
|
||||
use WebhooksError::*;
|
||||
|
||||
use crate::analytics::{Aggregate, Analytics};
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use WebhooksError::*;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
@@ -90,7 +90,7 @@ fn deny_immutable_fields_webhook(
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub(super) struct WebhookWithMetadata {
|
||||
pub(super) struct WebhookWithMetadataRedactedAuthorization {
|
||||
uuid: Uuid,
|
||||
is_editable: bool,
|
||||
#[schema(value_type = WebhookSettings)]
|
||||
@@ -98,8 +98,9 @@ pub(super) struct WebhookWithMetadata {
|
||||
webhook: Webhook,
|
||||
}
|
||||
|
||||
impl WebhookWithMetadata {
|
||||
pub fn from(uuid: Uuid, webhook: Webhook) -> Self {
|
||||
impl WebhookWithMetadataRedactedAuthorization {
|
||||
pub fn from(uuid: Uuid, mut webhook: Webhook) -> Self {
|
||||
webhook.redact_authorization_header();
|
||||
Self { uuid, is_editable: uuid != Uuid::nil(), webhook }
|
||||
}
|
||||
}
|
||||
@@ -107,7 +108,7 @@ impl WebhookWithMetadata {
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(super) struct WebhookResults {
|
||||
results: Vec<WebhookWithMetadata>,
|
||||
results: Vec<WebhookWithMetadataRedactedAuthorization>,
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
@@ -150,7 +151,7 @@ async fn get_webhooks(
|
||||
let results = webhooks
|
||||
.webhooks
|
||||
.into_iter()
|
||||
.map(|(uuid, webhook)| WebhookWithMetadata::from(uuid, webhook))
|
||||
.map(|(uuid, webhook)| WebhookWithMetadataRedactedAuthorization::from(uuid, webhook))
|
||||
.collect::<Vec<_>>();
|
||||
let results = WebhookResults { results };
|
||||
|
||||
@@ -301,7 +302,7 @@ fn check_changed(uuid: Uuid, webhook: &Webhook) -> Result<(), WebhooksError> {
|
||||
tag = "Webhooks",
|
||||
security(("Bearer" = ["webhooks.get", "webhooks.*", "*.get", "*"])),
|
||||
responses(
|
||||
(status = 200, description = "Webhook found", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
||||
(status = 200, description = "Webhook found", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"url": "https://your.site/on-tasks-completed",
|
||||
"headers": {
|
||||
@@ -324,7 +325,7 @@ async fn get_webhook(
|
||||
let mut webhooks = index_scheduler.webhooks_view();
|
||||
|
||||
let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
|
||||
let webhook = WebhookWithMetadata::from(uuid, webhook);
|
||||
let webhook = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||
|
||||
debug!(returns = ?webhook, "Get webhook");
|
||||
Ok(HttpResponse::Ok().json(webhook))
|
||||
@@ -337,7 +338,7 @@ async fn get_webhook(
|
||||
request_body = WebhookSettings,
|
||||
security(("Bearer" = ["webhooks.create", "webhooks.*", "*"])),
|
||||
responses(
|
||||
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
||||
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"url": "https://your.site/on-tasks-completed",
|
||||
"headers": {
|
||||
@@ -383,7 +384,7 @@ async fn post_webhook(
|
||||
|
||||
analytics.publish(PostWebhooksAnalytics, &req);
|
||||
|
||||
let response = WebhookWithMetadata::from(uuid, webhook);
|
||||
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||
debug!(returns = ?response, "Post webhook");
|
||||
Ok(HttpResponse::Created().json(response))
|
||||
}
|
||||
@@ -395,7 +396,7 @@ async fn post_webhook(
|
||||
request_body = WebhookSettings,
|
||||
security(("Bearer" = ["webhooks.update", "webhooks.*", "*"])),
|
||||
responses(
|
||||
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
||||
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"url": "https://your.site/on-tasks-completed",
|
||||
"headers": {
|
||||
@@ -435,7 +436,7 @@ async fn patch_webhook(
|
||||
|
||||
analytics.publish(PatchWebhooksAnalytics, &req);
|
||||
|
||||
let response = WebhookWithMetadata::from(uuid, webhook);
|
||||
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||
debug!(returns = ?response, "Patch webhook");
|
||||
Ok(HttpResponse::Ok().json(response))
|
||||
}
|
||||
|
||||
@@ -17,11 +17,13 @@ use meilisearch_types::milli::vector::Embedding;
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
|
||||
use roaring::RoaringBitmap;
|
||||
use tokio::task::JoinHandle;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::super::ranking_rules::{self, RankingRules};
|
||||
use super::super::{
|
||||
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker,
|
||||
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
|
||||
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchMetadata, SearchQuery,
|
||||
SearchQueryWithIndex,
|
||||
};
|
||||
use super::proxy::{proxy_search, ProxySearchError, ProxySearchParams};
|
||||
use super::types::{
|
||||
@@ -39,32 +41,58 @@ pub async fn perform_federated_search(
|
||||
federation: Federation,
|
||||
features: RoFeatures,
|
||||
is_proxy: bool,
|
||||
request_uid: Uuid,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
if is_proxy {
|
||||
features.check_network("Performing a remote federated search")?;
|
||||
}
|
||||
let before_search = std::time::Instant::now();
|
||||
let deadline = before_search + std::time::Duration::from_secs(9);
|
||||
|
||||
let timeout = std::env::var("MEILI_EXPERIMENTAL_REMOTE_SEARCH_TIMEOUT_SECONDS")
|
||||
.ok()
|
||||
.map(|p| p.parse().unwrap())
|
||||
.unwrap_or(25);
|
||||
|
||||
let deadline = before_search + std::time::Duration::from_secs(timeout);
|
||||
|
||||
let required_hit_count = federation.limit + federation.offset;
|
||||
let retrieve_vectors = queries.iter().any(|q| q.retrieve_vectors);
|
||||
|
||||
let network = index_scheduler.network();
|
||||
|
||||
// Preconstruct metadata keeping the original queries order for later metadata building
|
||||
let precomputed_query_metadata: Option<Vec<_>> = include_metadata.then(|| {
|
||||
queries
|
||||
.iter()
|
||||
.map(|q| {
|
||||
(
|
||||
q.index_uid.to_string(),
|
||||
q.federation_options.as_ref().and_then(|o| o.remote.clone()),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
// this implementation partition the queries by index to guarantee an important property:
|
||||
// - all the queries to a particular index use the same read transaction.
|
||||
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||
|
||||
// 1. partition queries by host and index
|
||||
let mut partitioned_queries = PartitionedQueries::new();
|
||||
|
||||
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||
partitioned_queries.partition(federated_query, query_index, &network, features)?
|
||||
}
|
||||
|
||||
// 2. perform queries, merge and make hits index by index
|
||||
// 2.1. start remote queries
|
||||
let remote_search =
|
||||
RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline);
|
||||
let remote_search = RemoteSearch::start(
|
||||
partitioned_queries.remote_queries_by_host,
|
||||
&federation,
|
||||
deadline,
|
||||
include_metadata,
|
||||
);
|
||||
|
||||
// 2.2. concurrently execute local queries
|
||||
let params = SearchByIndexParams {
|
||||
@@ -106,11 +134,25 @@ pub async fn perform_federated_search(
|
||||
let after_waiting_remote_results = std::time::Instant::now();
|
||||
|
||||
// 3. merge hits and metadata across indexes and hosts
|
||||
// 3.1. merge metadata
|
||||
|
||||
// 3.1. Build metadata in the same order as the original queries
|
||||
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
|
||||
// If a remote is present, set the local remote name
|
||||
let local_remote_name = network.local.clone().filter(|_| partitioned_queries.has_remote);
|
||||
|
||||
build_query_metadata(
|
||||
precomputed_query_metadata,
|
||||
local_remote_name,
|
||||
&remote_results,
|
||||
&results_by_index,
|
||||
)
|
||||
});
|
||||
|
||||
// 3.2. merge federation metadata
|
||||
let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) =
|
||||
merge_metadata(&mut results_by_index, &remote_results);
|
||||
|
||||
// 3.2. merge hits
|
||||
// 3.3. merge hits
|
||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
|
||||
.skip(federation.offset)
|
||||
.take(federation.limit)
|
||||
@@ -125,7 +167,7 @@ pub async fn perform_federated_search(
|
||||
.map(|hit| hit.hit())
|
||||
.collect();
|
||||
|
||||
// 3.3. merge query vectors
|
||||
// 3.4. merge query vectors
|
||||
let query_vectors = if retrieve_vectors {
|
||||
for remote_results in remote_results.iter_mut() {
|
||||
if let Some(remote_vectors) = remote_results.query_vectors.take() {
|
||||
@@ -144,7 +186,7 @@ pub async fn perform_federated_search(
|
||||
None
|
||||
};
|
||||
|
||||
// 3.4. merge facets
|
||||
// 3.5. merge facets
|
||||
let (facet_distribution, facet_stats, facets_by_index) =
|
||||
facet_order.merge(federation.merge_facets, remote_results, facets);
|
||||
|
||||
@@ -170,6 +212,8 @@ pub async fn perform_federated_search(
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
|
||||
request_uid: Some(request_uid),
|
||||
metadata: query_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -219,7 +263,7 @@ struct SearchResultByQueryIterItem<'a> {
|
||||
|
||||
fn merge_index_local_results(
|
||||
results_by_query: Vec<SearchResultByQuery<'_>>,
|
||||
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
|
||||
) -> impl Iterator<Item = SearchResultByQueryIterItem<'_>> + '_ {
|
||||
itertools::kmerge_by(
|
||||
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|
||||
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
|
||||
@@ -393,6 +437,7 @@ struct SearchHitByIndex {
|
||||
|
||||
struct SearchResultByIndex {
|
||||
index: String,
|
||||
primary_key: Option<String>,
|
||||
hits: Vec<SearchHitByIndex>,
|
||||
estimated_total_hits: usize,
|
||||
degraded: bool,
|
||||
@@ -400,6 +445,61 @@ struct SearchResultByIndex {
|
||||
facets: Option<ComputedFacets>,
|
||||
}
|
||||
|
||||
/// Builds query metadata for federated search results.
|
||||
///
|
||||
/// This function creates metadata for each query in the same order as the original queries,
|
||||
/// combining information from both local and remote search results. It handles the mapping
|
||||
/// of primary keys to their respective indexes and remotes to prevent collisions when
|
||||
/// multiple remotes have the same index_uid but different primary keys.
|
||||
fn build_query_metadata(
|
||||
precomputed_query_metadata: Vec<(String, Option<String>)>,
|
||||
local_remote_name: Option<String>,
|
||||
remote_results: &[FederatedSearchResult],
|
||||
results_by_index: &[SearchResultByIndex],
|
||||
) -> Vec<SearchMetadata> {
|
||||
// Create a map of (remote, index_uid) -> primary_key for quick lookup
|
||||
// This prevents collisions when multiple remotes have the same index_uid but different primary keys
|
||||
let mut primary_key_per_index = std::collections::HashMap::new();
|
||||
|
||||
// Build metadata for remote results
|
||||
for remote_result in remote_results {
|
||||
if let Some(remote_metadata) = &remote_result.metadata {
|
||||
for remote_meta in remote_metadata {
|
||||
if let SearchMetadata {
|
||||
remote: Some(remote_name),
|
||||
index_uid,
|
||||
primary_key: Some(primary_key),
|
||||
..
|
||||
} = remote_meta
|
||||
{
|
||||
let key = (Some(remote_name), index_uid);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata for local results
|
||||
for local_meta in results_by_index {
|
||||
if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = local_meta {
|
||||
let key = (None, index);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata in the same order as the original queries
|
||||
let mut query_metadata = Vec::new();
|
||||
for (index_uid, remote) in precomputed_query_metadata {
|
||||
let primary_key =
|
||||
primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string());
|
||||
let query_uid = Uuid::now_v7();
|
||||
// if the remote is not set, use the local remote name
|
||||
let remote = remote.or_else(|| local_remote_name.clone());
|
||||
query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote });
|
||||
}
|
||||
query_metadata
|
||||
}
|
||||
|
||||
fn merge_metadata(
|
||||
results_by_index: &mut Vec<SearchResultByIndex>,
|
||||
remote_results: &Vec<FederatedSearchResult>,
|
||||
@@ -411,6 +511,7 @@ fn merge_metadata(
|
||||
let mut max_remote_duration = Duration::ZERO;
|
||||
for SearchResultByIndex {
|
||||
index,
|
||||
primary_key: _,
|
||||
hits: _,
|
||||
estimated_total_hits: estimated_total_hits_by_index,
|
||||
facets: facets_by_index,
|
||||
@@ -439,6 +540,8 @@ fn merge_metadata(
|
||||
degraded: degraded_for_host,
|
||||
used_negative_operator: host_used_negative_operator,
|
||||
remote_errors: _,
|
||||
metadata: _,
|
||||
request_uid: _,
|
||||
} in remote_results
|
||||
{
|
||||
let this_remote_duration = Duration::from_millis(*processing_time_ms as u64);
|
||||
@@ -498,6 +601,10 @@ impl PartitionedQueries {
|
||||
.into());
|
||||
}
|
||||
|
||||
if federated_query.has_personalize() {
|
||||
return Err(MeilisearchHttpError::PersonalizationInFederatedQuery(query_index).into());
|
||||
}
|
||||
|
||||
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||
|
||||
let federation_options = federation_options.unwrap_or_default();
|
||||
@@ -566,7 +673,12 @@ struct RemoteSearch {
|
||||
}
|
||||
|
||||
impl RemoteSearch {
|
||||
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
|
||||
fn start(
|
||||
queries: RemoteQueriesByHost,
|
||||
federation: &Federation,
|
||||
deadline: Instant,
|
||||
include_metadata: bool,
|
||||
) -> Self {
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_millis(200))
|
||||
@@ -586,7 +698,10 @@ impl RemoteSearch {
|
||||
// never merge distant facets
|
||||
proxy_federation.merge_facets = None;
|
||||
let params = params.clone();
|
||||
async move { proxy_search(&node, queries, proxy_federation, ¶ms).await }
|
||||
async move {
|
||||
proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata)
|
||||
.await
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
@@ -630,6 +745,13 @@ impl RemoteSearch {
|
||||
continue 'remote_queries;
|
||||
}
|
||||
|
||||
// Add remote name to metadata
|
||||
if let Some(metadata) = res.metadata.as_mut() {
|
||||
for meta in metadata {
|
||||
meta.remote = Some(node_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
federation.insert(
|
||||
FEDERATION_REMOTE.to_string(),
|
||||
serde_json::Value::String(node_name.clone()),
|
||||
@@ -725,6 +847,7 @@ impl SearchByIndex {
|
||||
}
|
||||
};
|
||||
let rtxn = index.read_txn()?;
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
let criteria = index.criteria(&rtxn)?;
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
@@ -751,6 +874,12 @@ impl SearchByIndex {
|
||||
return Err(error);
|
||||
}
|
||||
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||
|
||||
// all queries for an index share the same budget
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
for QueryByIndex { query, weight, query_index } in queries {
|
||||
// use an immediately invoked lambda to capture the result without returning from the function
|
||||
|
||||
@@ -820,17 +949,13 @@ impl SearchByIndex {
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (mut search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
|
||||
&index,
|
||||
&rtxn,
|
||||
&query,
|
||||
&search_kind,
|
||||
time_budget,
|
||||
// clones of `TimeBudget` share the budget rather than restart it
|
||||
time_budget.clone(),
|
||||
params.features,
|
||||
)?;
|
||||
|
||||
@@ -977,6 +1102,7 @@ impl SearchByIndex {
|
||||
})?;
|
||||
self.results_by_index.push(SearchResultByIndex {
|
||||
index: index_uid,
|
||||
primary_key,
|
||||
hits: merged_result,
|
||||
estimated_total_hits,
|
||||
degraded,
|
||||
|
||||
@@ -7,7 +7,7 @@ use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
|
||||
use crate::search::SearchQueryWithIndex;
|
||||
use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER};
|
||||
|
||||
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
||||
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
||||
@@ -98,6 +98,7 @@ pub async fn proxy_search(
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
federation: Federation,
|
||||
params: &ProxySearchParams,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ProxySearchError> {
|
||||
let url = format!("{}/multi-search", node.url);
|
||||
|
||||
@@ -105,7 +106,12 @@ pub async fn proxy_search(
|
||||
|
||||
let search_api_key = node.search_api_key.as_deref();
|
||||
|
||||
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
|
||||
let timeout = std::env::var("MEILI_EXPERIMENTAL_REMOTE_SEARCH_TIMEOUT_SECONDS")
|
||||
.ok()
|
||||
.map(|p| p.parse().unwrap())
|
||||
.unwrap_or(25);
|
||||
|
||||
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout);
|
||||
|
||||
let deadline = if let Some(deadline) = params.deadline {
|
||||
std::time::Instant::min(deadline, max_deadline)
|
||||
@@ -114,7 +120,16 @@ pub async fn proxy_search(
|
||||
};
|
||||
|
||||
for i in 0..params.try_count {
|
||||
match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await {
|
||||
match try_proxy_search(
|
||||
&url,
|
||||
search_api_key,
|
||||
&federated,
|
||||
¶ms.client,
|
||||
deadline,
|
||||
include_metadata,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
let duration = retry.into_duration(i)?;
|
||||
@@ -122,7 +137,7 @@ pub async fn proxy_search(
|
||||
}
|
||||
}
|
||||
}
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline)
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata)
|
||||
.await
|
||||
.map_err(Retry::into_error)
|
||||
}
|
||||
@@ -133,6 +148,7 @@ async fn try_proxy_search(
|
||||
federated: &FederatedSearch,
|
||||
client: &Client,
|
||||
deadline: std::time::Instant,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, Retry> {
|
||||
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
||||
|
||||
@@ -143,6 +159,8 @@ async fn try_proxy_search(
|
||||
request
|
||||
};
|
||||
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
||||
let request =
|
||||
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
|
||||
@@ -16,6 +16,9 @@ use meilisearch_types::milli::order_by_map::OrderByMap;
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
|
||||
use crate::milli::vector::Embedding;
|
||||
@@ -131,6 +134,10 @@ pub struct FederatedSearchResult {
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(default, skip_serializing_if = "FederatedFacets::is_empty")]
|
||||
pub facets_by_index: FederatedFacets,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<Vec<SearchMetadata>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
|
||||
@@ -156,6 +163,8 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
remote_errors,
|
||||
request_uid,
|
||||
metadata,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchResult");
|
||||
@@ -188,6 +197,12 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
if let Some(remote_errors) = remote_errors {
|
||||
debug.field("remote_errors", &remote_errors);
|
||||
}
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ use serde_json::{json, Value};
|
||||
#[cfg(test)]
|
||||
mod mod_test;
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
@@ -56,6 +57,14 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct Personalize {
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub user_context: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
@@ -120,6 +129,8 @@ pub struct SearchQuery {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
pub personalize: Option<Personalize>,
|
||||
}
|
||||
|
||||
impl From<SearchParameters> for SearchQuery {
|
||||
@@ -167,6 +178,7 @@ impl From<SearchParameters> for SearchQuery {
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
locales: None,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -248,6 +260,7 @@ impl fmt::Debug for SearchQuery {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchQuery");
|
||||
@@ -336,6 +349,10 @@ impl fmt::Debug for SearchQuery {
|
||||
debug.field("locales", &locales);
|
||||
}
|
||||
|
||||
if let Some(personalize) = personalize {
|
||||
debug.field("personalize", &personalize);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
@@ -541,6 +558,9 @@ pub struct SearchQueryWithIndex {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
#[serde(skip)]
|
||||
pub personalize: Option<Personalize>,
|
||||
|
||||
#[deserr(default)]
|
||||
pub federation_options: Option<FederationOptions>,
|
||||
@@ -565,6 +585,10 @@ impl SearchQueryWithIndex {
|
||||
self.facets.as_deref().filter(|v| !v.is_empty())
|
||||
}
|
||||
|
||||
pub fn has_personalize(&self) -> bool {
|
||||
self.personalize.is_some()
|
||||
}
|
||||
|
||||
pub fn from_index_query_federation(
|
||||
index_uid: IndexUid,
|
||||
query: SearchQuery,
|
||||
@@ -598,6 +622,7 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
SearchQueryWithIndex {
|
||||
@@ -629,6 +654,7 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
federation_options,
|
||||
}
|
||||
}
|
||||
@@ -664,6 +690,7 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@@ -695,6 +722,7 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@@ -835,6 +863,18 @@ pub struct SearchHit {
|
||||
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct SearchMetadata {
|
||||
pub query_uid: Uuid,
|
||||
pub index_uid: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub remote: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
@@ -851,6 +891,10 @@ pub struct SearchResult {
|
||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<SearchMetadata>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_hit_count: Option<u32>,
|
||||
@@ -872,6 +916,8 @@ impl fmt::Debug for SearchResult {
|
||||
hits_info,
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
request_uid,
|
||||
metadata,
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
@@ -901,6 +947,12 @@ impl fmt::Debug for SearchResult {
|
||||
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||
}
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
@@ -1113,15 +1165,31 @@ pub fn prepare_search<'t>(
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub struct SearchParams {
|
||||
pub index_uid: String,
|
||||
pub query: SearchQuery,
|
||||
pub search_kind: SearchKind,
|
||||
pub retrieve_vectors: RetrieveVectors,
|
||||
pub features: RoFeatures,
|
||||
pub request_uid: Uuid,
|
||||
pub include_metadata: bool,
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index_uid: String,
|
||||
params: SearchParams,
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
) -> Result<(SearchResult, TimeBudget), ResponseError> {
|
||||
let SearchParams {
|
||||
index_uid,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
} = params;
|
||||
let before_search = Instant::now();
|
||||
let index_uid_for_metadata = index_uid.clone();
|
||||
let rtxn = index.read_txn()?;
|
||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
@@ -1129,7 +1197,7 @@ pub fn perform_search(
|
||||
};
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
|
||||
|
||||
let (
|
||||
milli::SearchResult {
|
||||
@@ -1142,7 +1210,20 @@ pub fn perform_search(
|
||||
query_vector,
|
||||
},
|
||||
semantic_hit_count,
|
||||
) = search_from_kind(index_uid, search_kind, search)?;
|
||||
) = search_from_kind(index_uid.clone(), search_kind, search)?;
|
||||
|
||||
let metadata = if include_metadata {
|
||||
let query_uid = Uuid::now_v7();
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
Some(SearchMetadata {
|
||||
query_uid,
|
||||
index_uid: index_uid_for_metadata,
|
||||
primary_key,
|
||||
remote: None, // Local searches don't have a remote
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
@@ -1174,6 +1255,7 @@ pub fn perform_search(
|
||||
attributes_to_search_on: _,
|
||||
filter: _,
|
||||
distinct: _,
|
||||
personalize: _,
|
||||
} = query;
|
||||
|
||||
let format = AttributesFormat {
|
||||
@@ -1225,7 +1307,6 @@ pub fn perform_search(
|
||||
.transpose()?
|
||||
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||
.unzip();
|
||||
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
@@ -1237,8 +1318,10 @@ pub fn perform_search(
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
semantic_hit_count,
|
||||
request_uid: Some(request_uid),
|
||||
metadata,
|
||||
};
|
||||
Ok(result)
|
||||
Ok((result, time_budget))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
@@ -2080,7 +2163,7 @@ pub(crate) fn parse_filter(
|
||||
facets: &Value,
|
||||
filter_parsing_error_code: Code,
|
||||
features: RoFeatures,
|
||||
) -> Result<Option<Filter>, ResponseError> {
|
||||
) -> Result<Option<Filter<'_>>, ResponseError> {
|
||||
let filter = match facets {
|
||||
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
|
||||
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
|
||||
@@ -2117,7 +2200,7 @@ pub(crate) fn parse_filter(
|
||||
Ok(filter)
|
||||
}
|
||||
|
||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
fn parse_filter_array(arr: &'_ [Value]) -> Result<Option<Filter<'_>>, MeilisearchHttpError> {
|
||||
let mut ands = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
//! What is going to happen at this point is that you're going to send a oneshot::Sender over an async mpsc channel.
|
||||
//! Then, the queue/scheduler is going to either:
|
||||
//! - Drop your oneshot channel => that means there are too many searches going on, and yours won't be executed.
|
||||
//! You should exit and free all the RAM you use ASAP.
|
||||
//! You should exit and free all the RAM you use ASAP.
|
||||
//! - Sends you a Permit => that will unlock the method, and you will be able to process your search.
|
||||
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
||||
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
@@ -419,14 +419,14 @@ async fn error_add_api_key_invalid_parameters_actions() {
|
||||
let (response, code) = server.add_api_key(content).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
|
||||
{
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`, `indexes.compact`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -91,14 +91,14 @@ async fn create_api_key_bad_actions() {
|
||||
// can't parse
|
||||
let (response, code) = server.add_api_key(json!({ "actions": ["doggo"] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`, `indexes.compact`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -1040,7 +1040,7 @@ async fn error_single_search_forbidden_token() {
|
||||
];
|
||||
|
||||
let failed_query_indexes: Vec<_> =
|
||||
std::iter::repeat(Some(0)).take(5).chain(std::iter::repeat(None).take(6)).collect();
|
||||
std::iter::repeat_n(Some(0), 5).chain(std::iter::repeat_n(None, 6)).collect();
|
||||
|
||||
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_SINGLE.len()];
|
||||
|
||||
@@ -1118,10 +1118,9 @@ async fn error_multi_search_forbidden_token() {
|
||||
},
|
||||
];
|
||||
|
||||
let failed_query_indexes: Vec<_> = std::iter::repeat(Some(0))
|
||||
.take(5)
|
||||
.chain(std::iter::repeat(Some(1)).take(5))
|
||||
.chain(std::iter::repeat(None).take(6))
|
||||
let failed_query_indexes: Vec<_> = std::iter::repeat_n(Some(0), 5)
|
||||
.chain(std::iter::repeat_n(Some(1), 5))
|
||||
.chain(std::iter::repeat_n(None, 6))
|
||||
.collect();
|
||||
|
||||
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_BOTH.len()];
|
||||
|
||||
@@ -40,14 +40,14 @@ async fn batch_bad_types() {
|
||||
|
||||
let (response, code) = server.batches_filter("types=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -91,7 +91,16 @@ impl<'a> Index<'a, Owned> {
|
||||
documents: Value,
|
||||
primary_key: Option<&str>,
|
||||
) -> (Value, StatusCode) {
|
||||
self._add_documents(documents, primary_key).await
|
||||
self._add_documents(documents, primary_key, None).await
|
||||
}
|
||||
|
||||
pub async fn add_documents_with_custom_metadata(
|
||||
&self,
|
||||
documents: Value,
|
||||
primary_key: Option<&str>,
|
||||
custom_metadata: Option<&str>,
|
||||
) -> (Value, StatusCode) {
|
||||
self._add_documents(documents, primary_key, custom_metadata).await
|
||||
}
|
||||
|
||||
pub async fn raw_add_documents(
|
||||
@@ -352,12 +361,25 @@ impl<State> Index<'_, State> {
|
||||
&self,
|
||||
documents: Value,
|
||||
primary_key: Option<&str>,
|
||||
custom_metadata: Option<&str>,
|
||||
) -> (Value, StatusCode) {
|
||||
let url = match primary_key {
|
||||
Some(key) => {
|
||||
format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key)
|
||||
let url = match (primary_key, custom_metadata) {
|
||||
(Some(key), Some(meta)) => {
|
||||
format!(
|
||||
"/indexes/{}/documents?primaryKey={key}&customMetadata={meta}",
|
||||
urlencode(self.uid.as_ref()),
|
||||
)
|
||||
}
|
||||
None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
|
||||
(None, Some(meta)) => {
|
||||
format!(
|
||||
"/indexes/{}/documents?&customMetadata={meta}",
|
||||
urlencode(self.uid.as_ref()),
|
||||
)
|
||||
}
|
||||
(Some(key), None) => {
|
||||
format!("/indexes/{}/documents?&primaryKey={key}", urlencode(self.uid.as_ref()),)
|
||||
}
|
||||
(None, None) => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
|
||||
};
|
||||
self.service.post_encoded(url, documents, self.encoder).await
|
||||
}
|
||||
@@ -516,6 +538,18 @@ impl<State> Index<'_, State> {
|
||||
self.service.post_encoded(url, query, self.encoder).await
|
||||
}
|
||||
|
||||
pub async fn search_with_headers(
|
||||
&self,
|
||||
query: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref()));
|
||||
let body = serde_json::to_string(&query).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str(url, body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
|
||||
self.service.get(url).await
|
||||
|
||||
@@ -241,7 +241,7 @@ pub async fn shared_index_with_documents() -> &'static Index<'static, Shared> {
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_DOCUMENTS").to_shared();
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (response, _code) = index._add_documents(documents, None).await;
|
||||
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
let (response, _code) = index
|
||||
._update_settings(
|
||||
@@ -284,7 +284,7 @@ pub async fn shared_index_with_score_documents() -> &'static Index<'static, Shar
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared();
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
let (response, _code) = index._add_documents(documents, None).await;
|
||||
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
let (response, _code) = index
|
||||
._update_settings(
|
||||
@@ -361,7 +361,7 @@ pub async fn shared_index_with_nested_documents() -> &'static Index<'static, Sha
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
let (response, _code) = index._add_documents(documents, None).await;
|
||||
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
let (response, _code) = index
|
||||
._update_settings(
|
||||
@@ -508,7 +508,7 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
|
||||
.get_or_init(|| async {
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_GEO_DOCUMENTS").to_shared();
|
||||
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None).await;
|
||||
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None, None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (response, _code) = index
|
||||
@@ -522,6 +522,26 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn shared_index_geojson_documents() -> &'static Index<'static, Shared> {
|
||||
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
|
||||
INDEX
|
||||
.get_or_init(|| async {
|
||||
// Retrieved from https://gitlab-forge.din.developpement-durable.gouv.fr/pub/geomatique/descartes/d-map/-/blob/main/demo/examples/commons/countries.geojson?ref_type=heads
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
|
||||
let countries = include_str!("../documents/geojson/assets/countries.json");
|
||||
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
|
||||
let (response, _code) = index._add_documents(Value(lille), Some("name"), None).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (response, _code) =
|
||||
index._update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
index
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
|
||||
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
|
||||
let (server, uid) = INDEX
|
||||
|
||||
@@ -49,8 +49,8 @@ impl Server<Owned> {
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -65,7 +65,9 @@ impl Server<Owned> {
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -78,7 +80,9 @@ impl Server<Owned> {
|
||||
}
|
||||
|
||||
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options)?;
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle)?;
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Ok(Server { service, _dir: None, _marker: PhantomData })
|
||||
@@ -217,8 +221,9 @@ impl Server<Shared> {
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, api_key: None, options };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -390,6 +395,17 @@ impl<State> Server<State> {
|
||||
self.service.post("/multi-search", queries).await
|
||||
}
|
||||
|
||||
pub async fn multi_search_with_headers(
|
||||
&self,
|
||||
queries: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let body = serde_json::to_string(&queries).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str("/multi-search", body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
|
||||
self.service.get(format!("/indexes{parameters}")).await
|
||||
}
|
||||
@@ -490,6 +506,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
|
||||
experimental_no_edition_2024_for_settings: false,
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
|
||||
@@ -10,8 +10,9 @@ use actix_web::test::TestRequest;
|
||||
use actix_web::web::Data;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::personalization::PersonalizationService;
|
||||
use meilisearch::search_queue::SearchQueue;
|
||||
use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
|
||||
use meilisearch::{create_app, Opt, ServicesData, SubscriberForSecondLayer};
|
||||
use meilisearch_auth::AuthController;
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::Layer;
|
||||
@@ -135,14 +136,24 @@ impl Service {
|
||||
self.options.experimental_search_queue_size,
|
||||
NonZeroUsize::new(1).unwrap(),
|
||||
);
|
||||
let personalization_service = self
|
||||
.options
|
||||
.experimental_personalization_api_key
|
||||
.clone()
|
||||
.map(PersonalizationService::cohere)
|
||||
.unwrap_or_else(PersonalizationService::disabled);
|
||||
|
||||
actix_web::test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone().into(),
|
||||
Data::new(search_queue),
|
||||
ServicesData {
|
||||
index_scheduler: self.index_scheduler.clone().into(),
|
||||
auth: self.auth.clone().into(),
|
||||
search_queue: Data::new(search_queue),
|
||||
personalization_service: Data::new(personalization_service),
|
||||
logs_route_handle: Data::new(route_layer_handle),
|
||||
logs_stderr_handle: Data::new(stderr_layer_handle),
|
||||
analytics: Data::new(Analytics::no_analytics()),
|
||||
},
|
||||
self.options.clone(),
|
||||
(route_layer_handle, stderr_layer_handle),
|
||||
Data::new(Analytics::no_analytics()),
|
||||
true,
|
||||
))
|
||||
.await
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user