mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-27 16:30:31 +00:00
Compare commits
277 Commits
prototype-
...
introduce-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d14790824d | ||
|
|
c01d26ffd7 | ||
|
|
225af069a9 | ||
|
|
afc6c10a2a | ||
|
|
4e819a6187 | ||
|
|
f2185438ee | ||
|
|
ca1ad51564 | ||
|
|
a1d1e7c82a | ||
|
|
56438bdea4 | ||
|
|
33b67b82e1 | ||
|
|
7b4f2aa593 | ||
|
|
1fb96d3edb | ||
|
|
e93a5719ef | ||
|
|
d34f0b606c | ||
|
|
acc400face | ||
|
|
aa2327591e | ||
|
|
a6f9e0ddf0 | ||
|
|
60470bb647 | ||
|
|
294e1ba16d | ||
|
|
8e6893ddbe | ||
|
|
d018346f18 | ||
|
|
de98656ed1 | ||
|
|
da7469be38 | ||
|
|
df9d10ac44 | ||
|
|
528d9d6d8b | ||
|
|
4fb5c39b92 | ||
|
|
022205af90 | ||
|
|
50280bf02b | ||
|
|
9b579069df | ||
|
|
f5a4a1c8b2 | ||
|
|
5ab4cdb1f3 | ||
|
|
1f54f07f72 | ||
|
|
73d8a4eace | ||
|
|
c1e5897076 | ||
|
|
718a98fbbf | ||
|
|
86bf231d29 | ||
|
|
182c3f4b80 | ||
|
|
c1eba66443 | ||
|
|
7197ced673 | ||
|
|
4f21ee6c66 | ||
|
|
787472453d | ||
|
|
8f65f35de9 | ||
|
|
c27c923439 | ||
|
|
fd5649091d | ||
|
|
9a57736773 | ||
|
|
7740997ea8 | ||
|
|
7eb23f73ba | ||
|
|
b9e9fc376a | ||
|
|
27bf2f1298 | ||
|
|
d4d82fbd0c | ||
|
|
eda09a54da | ||
|
|
b132d70413 | ||
|
|
e41ebd3047 | ||
|
|
705d31e8bd | ||
|
|
7d95950ce6 | ||
|
|
c6b4c21c23 | ||
|
|
bf96fdb858 | ||
|
|
41eeffd88d | ||
|
|
1eb9fe8562 | ||
|
|
bac7a1623a | ||
|
|
5458850d21 | ||
|
|
20ac59c946 | ||
|
|
cfc1e193b6 | ||
|
|
0cc25c7e4c | ||
|
|
102681e384 | ||
|
|
3ef7a478cd | ||
|
|
e70ac35e02 | ||
|
|
d3654906bf | ||
|
|
b15de68831 | ||
|
|
6723700fb9 | ||
|
|
2c099b7c23 | ||
|
|
50fca8fc70 | ||
|
|
b9d92c481b | ||
|
|
d142c5e432 | ||
|
|
4d4683adb6 | ||
|
|
d6063079af | ||
|
|
2e04ab4737 | ||
|
|
d95384a636 | ||
|
|
c0690f5b9e | ||
|
|
909d84447d | ||
|
|
2cf57d584e | ||
|
|
59242b9c4f | ||
|
|
6a6212d4e1 | ||
|
|
a8006a3750 | ||
|
|
0e0e462f5b | ||
|
|
805531c90d | ||
|
|
a6470a0c37 | ||
|
|
8a54f14b8e | ||
|
|
be5e521cb0 | ||
|
|
60f20119a2 | ||
|
|
2f257fdc3d | ||
|
|
0991cb0de4 | ||
|
|
4709c638ed | ||
|
|
0776217801 | ||
|
|
9eae36ce3e | ||
|
|
3f501c9b85 | ||
|
|
c85146524b | ||
|
|
79d192fb3f | ||
|
|
a4ed36f0cc | ||
|
|
dddb51a9ca | ||
|
|
8f006eeaf3 | ||
|
|
445e5aff02 | ||
|
|
234d0c360f | ||
|
|
cd181b36c3 | ||
|
|
4cfe0dbdd8 | ||
|
|
89a4ac92eb | ||
|
|
deb90ff573 | ||
|
|
0c10063a87 | ||
|
|
87ea080c10 | ||
|
|
6d62fa061b | ||
|
|
de6cd3ac01 | ||
|
|
cb8f033130 | ||
|
|
03097e65e8 | ||
|
|
c32bec338f | ||
|
|
73d3d286d9 | ||
|
|
29eeb84ce3 | ||
|
|
d78951feb7 | ||
|
|
63c8cbae5b | ||
|
|
72ded27e98 | ||
|
|
c25781f720 | ||
|
|
c3b18fede9 | ||
|
|
4070895a21 | ||
|
|
a21711f473 | ||
|
|
f0ec8cbffe | ||
|
|
e568dbbabb | ||
|
|
8ff15b3dfb | ||
|
|
247eaed872 | ||
|
|
8b1fcfd7f8 | ||
|
|
45f289488d | ||
|
|
b0ef7701ae | ||
|
|
c9fb6c48b8 | ||
|
|
0de34aa8fa | ||
|
|
6bfcad4b05 | ||
|
|
67a0c9fff8 | ||
|
|
cc4aca78c4 | ||
|
|
5c7fa9b924 | ||
|
|
9837de271d | ||
|
|
fd251c37bb | ||
|
|
adb6bca950 | ||
|
|
42854c0bca | ||
|
|
d0bdff7b7b | ||
|
|
8650ee66c1 | ||
|
|
377fa09cb7 | ||
|
|
00a03742ff | ||
|
|
d11e359244 | ||
|
|
09d45439c7 | ||
|
|
5d92da0c73 | ||
|
|
677bb39e73 | ||
|
|
85ea77de0b | ||
|
|
03317be0bd | ||
|
|
4aa7c8f7b1 | ||
|
|
ce57a342a3 | ||
|
|
1cc6cd78e0 | ||
|
|
c204afdc79 | ||
|
|
c14967eeac | ||
|
|
f38db86120 | ||
|
|
50b155fa2d | ||
|
|
a533c8e041 | ||
|
|
e5595a05df | ||
|
|
908adee6fc | ||
|
|
7b3353252f | ||
|
|
647a10bf18 | ||
|
|
f2141a894a | ||
|
|
08c332980b | ||
|
|
7b57a44b5a | ||
|
|
fe2c0cc3d5 | ||
|
|
eecf4c53e7 | ||
|
|
cf4c3c287b | ||
|
|
71e5605daa | ||
|
|
890a5c64dd | ||
|
|
0ee4671a91 | ||
|
|
68333424c6 | ||
|
|
d4529d8c83 | ||
|
|
5e8144b0e1 | ||
|
|
3e3695445f | ||
|
|
091f989b72 | ||
|
|
dd28a3fd5a | ||
|
|
6f24b438e0 | ||
|
|
48a9ad4c17 | ||
|
|
b997039a91 | ||
|
|
0e6b6bd130 | ||
|
|
b1b0b0b67c | ||
|
|
27155f845c | ||
|
|
c6f14279d7 | ||
|
|
fa15356209 | ||
|
|
99f5e09a79 | ||
|
|
a8ef6f08e0 | ||
|
|
ae5a04e85c | ||
|
|
8ebfc9fa92 | ||
|
|
21026f0ca8 | ||
|
|
e579554c84 | ||
|
|
ff49250c1a | ||
|
|
8b95c6ae56 | ||
|
|
28162759a4 | ||
|
|
dd128656cb | ||
|
|
4456df5a46 | ||
|
|
0b104b3efa | ||
|
|
ac944f0960 | ||
|
|
5f55e88484 | ||
|
|
aab6ffec30 | ||
|
|
1dd33af8a3 | ||
|
|
8a2a1e4d27 | ||
|
|
e2686c0fce | ||
|
|
9473a2a6ca | ||
|
|
11ce3b9636 | ||
|
|
0bf4157a75 | ||
|
|
4eaa626bca | ||
|
|
668b26b641 | ||
|
|
04e4586fb3 | ||
|
|
78f6f22a80 | ||
|
|
13afdaf393 | ||
|
|
742d0ee531 | ||
|
|
4275833bab | ||
|
|
de7f8c4406 | ||
|
|
f00a285a6d | ||
|
|
43bb02e7b4 | ||
|
|
56fd4ee9bd | ||
|
|
0625d08e4e | ||
|
|
9269086fda | ||
|
|
98e3ecb86b | ||
|
|
9af9e73c45 | ||
|
|
4b107b17cb | ||
|
|
cb82b0798a | ||
|
|
7f1071943e | ||
|
|
3c1e7c7428 | ||
|
|
baeefa4817 | ||
|
|
e8ba7833ec | ||
|
|
db676aee73 | ||
|
|
1a0d8810e5 | ||
|
|
4615d86748 | ||
|
|
525e67ba93 | ||
|
|
44eb153619 | ||
|
|
195785c47f | ||
|
|
4eae92f411 | ||
|
|
d7cb319217 | ||
|
|
15062e7dba | ||
|
|
bf19f86e38 | ||
|
|
91c7ef8723 | ||
|
|
fc23a0ee52 | ||
|
|
d3491851bc | ||
|
|
886404cc4d | ||
|
|
75a7f0e26c | ||
|
|
47827ca5c1 | ||
|
|
f75d74a967 | ||
|
|
42648919c7 | ||
|
|
6987cac1ba | ||
|
|
082237863e | ||
|
|
4bcdd7a9f9 | ||
|
|
fc4b7ccb70 | ||
|
|
df9ac07922 | ||
|
|
ba27a09efe | ||
|
|
bc51d3a918 | ||
|
|
b39d4e9b50 | ||
|
|
b18cd9075d | ||
|
|
36b897858a | ||
|
|
a7b2f461cf | ||
|
|
fce132a21b | ||
|
|
9c857ff48f | ||
|
|
f27b33dabe | ||
|
|
9eb4b84abd | ||
|
|
71834787ec | ||
|
|
b004db37c7 | ||
|
|
0c04cd1d9f | ||
|
|
63ea405b3e | ||
|
|
1a01196a80 | ||
|
|
f4ff722247 | ||
|
|
262b429a4c | ||
|
|
0a0a5f84bf | ||
|
|
c06f386ac3 | ||
|
|
2a04ecccc4 | ||
|
|
34254b42b6 | ||
|
|
54e34beac6 | ||
|
|
c0aa018c87 | ||
|
|
b21d7aedf9 | ||
|
|
2f1a9105b9 | ||
|
|
27bb591331 | ||
|
|
94a1f5a8ea |
22
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
22
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@@ -22,6 +22,28 @@ Related product discussion:
|
||||
|
||||
<!---If necessary, create a list with technical/product steps-->
|
||||
|
||||
### Reminders when modifying the API
|
||||
|
||||
- [ ] Update the openAPI file with utoipa:
|
||||
- [ ] If a new module has been introduced, create a new structure deriving [the OpenAPI proc-macro](https://docs.rs/utoipa/latest/utoipa/derive.OpenApi.html) and nest it in the main [openAPI structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L64-L78).
|
||||
- [ ] If a new route has been introduced, add the [path decorator](https://docs.rs/utoipa/latest/utoipa/attr.path.html) to it and add the route at the top of the file in its openAPI structure.
|
||||
- [ ] If a structure which is deserialized or serialized in the API has been introduced or modified, it must derive the [`schema`](https://docs.rs/utoipa/latest/utoipa/macro.schema.html) or the [`IntoParams`](https://docs.rs/utoipa/latest/utoipa/derive.IntoParams.html) proc-macro.
|
||||
If it's a **new** structure you must also add it to the big list of structures [in the main `OpenApi` structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L88).
|
||||
- [ ] Once everything is done, start Meilisearch with the swagger flag: `cargo run --features swagger`, open `http://localhost:7700/scalar` on your browser, and ensure everything works as expected.
|
||||
- For more info, refer to [this presentation](https://pitch.com/v/generating-the-openapi-file-jrn3nh).
|
||||
|
||||
### Reminders when modifying a database
|
||||
|
||||
- [ ] If modifying the API keys => Reach out to the team we've not planned anything for that yet
|
||||
- [ ] If modifying a milli index =>
|
||||
- [ ] You can add the upgrade code to go from the previous to the latest version [here](https://github.com/meilisearch/meilisearch/blob/main/crates/milli/src/update/upgrade/mod.rs)
|
||||
- [ ] Don't forget to return `true` to update the stats if needed
|
||||
- [ ] Did the read read part requires a change? (gettings documents, search, facet search etc)
|
||||
The index must **always** be able to read the old version of the database. Make everything with that in mind
|
||||
and if the old and new database are not compatible you may need to match on the version of the database and keep both the old and new code. (The version of the index is stored in the main database under the `VERSION_KEY` key)
|
||||
- [ ] If modifying the task queue or the index-mapper =>
|
||||
- [ ] Update
|
||||
|
||||
### Reminders when modifying the Setting API
|
||||
|
||||
<!--- Special steps to remind when adding a new index setting -->
|
||||
|
||||
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
4
.github/workflows/bench-pr.yml
vendored
4
.github/workflows/bench-pr.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
reaction-type: "rocket"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
- uses: xt0rted/pull-request-comment-branch@v3
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/bench-push-indexing.yml
vendored
2
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
4
.github/workflows/benchmarks-pr.yml
vendored
4
.github/workflows/benchmarks-pr.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
reaction-type: "eyes"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
- uses: xt0rted/pull-request-comment-branch@v3
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/flaky-tests.yml
vendored
2
.github/workflows/flaky-tests.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
|
||||
2
.github/workflows/fuzzer-indexing.yml
vendored
2
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
|
||||
62
.github/workflows/test-suite.yml
vendored
62
.github/workflows/test-suite.yml
vendored
@@ -4,7 +4,7 @@ on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: '0 5 * * *'
|
||||
- cron: "0 5 * * *"
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
@@ -31,9 +31,9 @@ jobs:
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -55,8 +55,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -81,13 +81,45 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
- name: Run cargo test with almost all features
|
||||
run: |
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
|
||||
ollama-ubuntu:
|
||||
name: Test with Ollama
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Install Ollama
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
||||
- name: Start serving
|
||||
run: |
|
||||
# Run it in the background, there is no way to daemonise at the moment
|
||||
ollama serve &
|
||||
|
||||
# A short pause is required before the HTTP port is opened
|
||||
sleep 5
|
||||
|
||||
# This endpoint blocks until ready
|
||||
time curl -i http://localhost:11434
|
||||
|
||||
- name: Pull nomic-embed-text & all-minilm
|
||||
run: |
|
||||
ollama pull nomic-embed-text
|
||||
ollama pull all-minilm
|
||||
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all --features test-ollama ollama
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
@@ -101,7 +133,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@@ -125,9 +157,9 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -139,12 +171,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -156,14 +188,14 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Install sd
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,7 @@
|
||||
/dumps
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
/benchmarks
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
|
||||
@@ -48,6 +48,27 @@ cargo xtask bench --no-dashboard -- workloads/my_workload_1.json workloads/my_wo
|
||||
|
||||
For processing the results, look at [Looking at benchmark results/Without dashboard](#without-dashboard).
|
||||
|
||||
#### Sending a workload by hand
|
||||
|
||||
Sometimes you want to visualize the metrics of a worlkoad that comes from a custom report.
|
||||
It is not quite easy to trick the benchboard in thinking that your report is legitimate but here are the commands you can run to upload your firefox report on a running benchboard.
|
||||
|
||||
```bash
|
||||
# Name this hostname whatever you want
|
||||
echo '{ "hostname": "the-best-place" }' | xh PUT 'http://127.0.0.1:9001/api/v1/machine'
|
||||
|
||||
# You'll receive an UUID from this command that we will call $invocation_uuid
|
||||
echo '{ "commit": { "sha1": "1234567", "commit_date": "2024-09-05 12:00:12.0 +00:00:00", "message": "A cool message" }, "machine_hostname": "the-best-place", "max_workloads": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/invocation'
|
||||
|
||||
# Just use UUID from the previous command
|
||||
# and you'll receive another UUID that we will call $workload_uuid
|
||||
echo '{ "invocation_uuid": "$invocation_uuid", "name": "toto", "max_runs": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/workload'
|
||||
|
||||
# And now use your $workload_uuid and the content of your firefox report
|
||||
# but don't forget to convert your firefox report from JSONLines into an object
|
||||
echo '{ "workload_uuid": "$workload_uuid", "data": $REPORT_JSON_DATA }' | xh PUT 'http://127.0.0.1:9001/api/v1/run'
|
||||
```
|
||||
|
||||
### In CI
|
||||
|
||||
We have dedicated runners to run workloads on CI. Currently, there are three ways of running the CI:
|
||||
|
||||
1235
Cargo.lock
generated
1235
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.12.0"
|
||||
version = "1.13.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.79.0-alpine3.20 AS compiler
|
||||
FROM rust:1.81.0-alpine3.20 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2024 Meili SAS
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -58,6 +58,7 @@ See the list of all our example apps in our [demos repository](https://github.co
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **AI-ready:** works out of the box with [langchain](https://www.meilisearch.com/with/langchain) and the [model context protocol](https://github.com/meilisearch/meilisearch-mcp)
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
@@ -1403,6 +1403,104 @@
|
||||
"title": "Number of tasks by indexes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 15,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 51
|
||||
},
|
||||
"id": 29,
|
||||
"interval": "5s",
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.1.4",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_task_queue_latency_seconds{instance=\"$instance\", job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "{{value}} ",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Task queue latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"datasource": {
|
||||
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
csv = "1.3.0"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.14.0"
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.7"
|
||||
roaring = "0.10.10"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
bytes = "1.6.0"
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.30"
|
||||
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
@@ -10,7 +10,7 @@ use milli::documents::PrimaryKey;
|
||||
use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::Index;
|
||||
use rand::seq::SliceRandom;
|
||||
@@ -38,7 +38,7 @@ fn setup_index() -> Index {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
Index::new(options, path).unwrap()
|
||||
Index::new(options, path, true).unwrap()
|
||||
}
|
||||
|
||||
fn setup_settings<'t>(
|
||||
@@ -138,10 +138,9 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -205,10 +204,9 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -250,10 +248,9 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -319,10 +316,9 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -396,10 +392,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -441,10 +436,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -482,10 +476,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -549,11 +542,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -617,10 +609,9 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -684,10 +675,9 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -750,10 +740,9 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -795,10 +784,9 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -863,10 +851,9 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -939,11 +926,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -985,11 +971,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1027,11 +1012,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1095,10 +1079,9 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1161,10 +1144,9 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1206,10 +1188,9 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1274,10 +1255,9 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1387,10 +1367,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1432,10 +1411,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1473,10 +1451,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1563,10 +1540,9 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1654,10 +1630,9 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1737,10 +1712,9 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1804,10 +1778,9 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1870,10 +1843,9 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1915,10 +1887,9 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1983,10 +1954,9 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
|
||||
@@ -12,7 +12,7 @@ use memmap2::Mmap;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
@@ -68,7 +68,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
let index = Index::new(options, conf.database_name, true).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
@@ -99,8 +99,8 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.36", features = ["parsing"] }
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
vergen-git2 = "1.0.0"
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
|
||||
@@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
flate2 = "1.0.30"
|
||||
http = "1.1.0"
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
once_cell = "1.20.2"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -141,6 +141,9 @@ pub enum KindDump {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -210,6 +213,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindDump::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -458,7 +464,7 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
fn create_test_features() -> RuntimeTogglableFeatures {
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
RuntimeTogglableFeatures::default()
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -327,10 +327,7 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -373,10 +370,7 @@ pub(crate) mod test {
|
||||
|
||||
assert_eq!(test.documents().unwrap().count(), 1);
|
||||
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
@@ -17,4 +17,5 @@ nom_locate = "4.2.0"
|
||||
unescaper = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.39.0"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
|
||||
@@ -179,6 +179,26 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
|
||||
if depth == 0 {
|
||||
return Box::new(std::iter::empty());
|
||||
}
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } | FilterCondition::In { fid, .. } => {
|
||||
Box::new(std::iter::once(fid))
|
||||
}
|
||||
FilterCondition::Not(filter) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
filter.fids(depth)
|
||||
}
|
||||
FilterCondition::And(subfilters) | FilterCondition::Or(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
Box::new(subfilters.iter().flat_map(move |f| f.fids(depth)))
|
||||
}
|
||||
_ => Box::new(std::iter::empty()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
match self {
|
||||
@@ -978,6 +998,43 @@ pub mod tests {
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fids() {
|
||||
let filter = Fc::parse("field = value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field IN [1, 2, 3]").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field != value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 AND field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 OR field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let depth = 2;
|
||||
let filter =
|
||||
Fc::parse("field1 = value1 AND (field2 = value2 OR field3 = value3)").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(depth).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_from_str() {
|
||||
let s = "test string that should not be parsed";
|
||||
|
||||
@@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
fastrand = "2.1.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.10.1"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
@@ -12,7 +12,7 @@ use milli::documents::mmap_from_objects;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::Index;
|
||||
use serde_json::Value;
|
||||
@@ -63,7 +63,7 @@ fn main() {
|
||||
Some(path) => TempDir::new_in(path).unwrap(),
|
||||
None => TempDir::new().unwrap(),
|
||||
};
|
||||
let index = Index::new(options, tempdir.path()).unwrap();
|
||||
let index = Index::new(options, tempdir.path(), true).unwrap();
|
||||
let indexer_config = IndexerConfig::default();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
@@ -89,9 +89,7 @@ fn main() {
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer = indexer::DocumentOperation::new(
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
for op in batch.0 {
|
||||
@@ -115,7 +113,7 @@ fn main() {
|
||||
for op in &operations {
|
||||
match op {
|
||||
Either::Left(documents) => {
|
||||
indexer.add_documents(documents).unwrap()
|
||||
indexer.replace_documents(documents).unwrap()
|
||||
}
|
||||
Either::Right(ids) => indexer.delete_documents(ids),
|
||||
}
|
||||
|
||||
@@ -11,42 +11,43 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bincode = "1.3.3"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.2"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.20.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
flate2 = "1.0.35"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.4"
|
||||
memmap2 = "0.9.5"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam-channel = "0.5.13"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
crossbeam-channel = "0.5.14"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
@@ -1,901 +0,0 @@
|
||||
/*!
|
||||
The autobatcher is responsible for combining the next enqueued
|
||||
tasks affecting a single index into a [batch](crate::batch::Batch).
|
||||
|
||||
The main function of the autobatcher is [`next_autobatch`].
|
||||
*/
|
||||
|
||||
use std::ops::ControlFlow::{self, Break, Continue};
|
||||
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
self, ReplaceDocuments, UpdateDocuments,
|
||||
};
|
||||
use meilisearch_types::tasks::TaskId;
|
||||
|
||||
use crate::KindWithContent;
|
||||
|
||||
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
|
||||
/// for the purpose of simplifying the implementation of the autobatcher.
|
||||
///
|
||||
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
|
||||
enum AutobatchKind {
|
||||
DocumentImport {
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
DocumentEdition,
|
||||
DocumentDeletion {
|
||||
by_filter: bool,
|
||||
},
|
||||
DocumentClear,
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
IndexCreation,
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { allow_index_creation, .. }
|
||||
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<KindWithContent> for AutobatchKind {
|
||||
fn from(kind: KindWithContent) -> Self {
|
||||
match kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
..
|
||||
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
||||
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||
KindWithContent::DocumentDeletion { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: false }
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: true }
|
||||
}
|
||||
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
||||
AutobatchKind::Settings {
|
||||
allow_index_creation: allow_index_creation && !is_deletion,
|
||||
}
|
||||
}
|
||||
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BatchKind {
|
||||
DocumentClear {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentOperation {
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentEdition {
|
||||
id: TaskId,
|
||||
},
|
||||
DocumentDeletion {
|
||||
deletion_ids: Vec<TaskId>,
|
||||
includes_by_filter: bool,
|
||||
},
|
||||
ClearAndSettings {
|
||||
other: Vec<TaskId>,
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
IndexDeletion {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
IndexCreation {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexUpdate {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
// TODO use an AutoBatchKind as input
|
||||
pub fn new(
|
||||
task_id: TaskId,
|
||||
kind: KindWithContent,
|
||||
primary_key: Option<&str>,
|
||||
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match AutobatchKind::from(kind) {
|
||||
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
|
||||
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
|
||||
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
|
||||
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { method, allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
{
|
||||
(
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key: pk,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
)
|
||||
}
|
||||
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
|
||||
K::DocumentImport { method, allow_index_creation, primary_key } => (
|
||||
Break(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
),
|
||||
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||
K::DocumentDeletion { by_filter: includes_by_filter } => (
|
||||
Continue(BatchKind::DocumentDeletion {
|
||||
deletion_ids: vec![task_id],
|
||||
includes_by_filter,
|
||||
}),
|
||||
false,
|
||||
),
|
||||
K::Settings { allow_index_creation } => (
|
||||
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
||||
allow_index_creation,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
#[rustfmt::skip]
|
||||
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match (self, kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break(this)
|
||||
},
|
||||
// NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue.
|
||||
// I wrote it this way because it's easier to understand than the other way around.
|
||||
(this, kind) if !(
|
||||
// 1. If both task don't interact with primary key -> we can continue
|
||||
(this.primary_key().is_none() && kind.primary_key().is_none()) ||
|
||||
// 2. Else ->
|
||||
(
|
||||
// 2.1 If we already have a primary-key ->
|
||||
(
|
||||
primary_key.is_some() &&
|
||||
// 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key
|
||||
// 2.1.2 If the task don't have a primary-key -> we can continue
|
||||
kind.primary_key().map_or(true, |pk| pk == primary_key)
|
||||
) ||
|
||||
// 2.2 If we don't have a primary-key ->
|
||||
(
|
||||
// 2.2.1 If both the batch and the task have a primary key they should be equal
|
||||
// 2.2.2 If the batch is set to Some(None), the task should be too
|
||||
// 2.2.3 If the batch is set to None -> we can continue
|
||||
this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind)
|
||||
)
|
||||
)
|
||||
|
||||
) // closing the negation
|
||||
|
||||
=> {
|
||||
Break(this)
|
||||
},
|
||||
// The index deletion can batch with everything but must stop after
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids }
|
||||
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
|
||||
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
|
||||
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
ids.append(&mut other);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids },
|
||||
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
|
||||
) => {
|
||||
ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids })
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentClear { .. },
|
||||
K::DocumentImport { .. } | K::Settings { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: operation_ids })
|
||||
}
|
||||
|
||||
// we can autobatch the same kind of document additions / updates
|
||||
(
|
||||
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method: ReplaceDocuments,
|
||||
allow_index_creation,
|
||||
operation_ids,
|
||||
primary_key: pk,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method: UpdateDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
|
||||
K::DocumentDeletion { by_filter: false },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// We can't batch a document operation with a delete by filter
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentDeletion { by_filter: true },
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
// but we can't autobatch documents if it's not the same kind
|
||||
// this match branch MUST be AFTER the previous one
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentImport { .. },
|
||||
) => Break(this),
|
||||
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
||||
}
|
||||
// we can't autobatch the deletion and import if the document deletion contained a filter
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
|
||||
K::DocumentImport { .. }
|
||||
) => Break(this),
|
||||
// we can autobatch the deletion and import if the index already exists
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||
) if index_already_exists => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can autobatch the deletion and import if both can't create an index
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||
) if !allow_index_creation => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { .. },
|
||||
K::DocumentImport { .. }
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
|
||||
}
|
||||
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
other: vec![id],
|
||||
}),
|
||||
(
|
||||
this @ BatchKind::Settings { .. },
|
||||
K::DocumentImport { .. } | K::DocumentDeletion { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::Settings { mut settings_ids, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::Settings {
|
||||
allow_index_creation,
|
||||
settings_ids,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
|
||||
(
|
||||
BatchKind::ClearAndSettings {
|
||||
mut other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
},
|
||||
K::DocumentDeletion { .. },
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a batch from an ordered list of tasks.
|
||||
///
|
||||
/// ## Preconditions
|
||||
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
|
||||
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
|
||||
/// 3. The tasks must all be related to the same index
|
||||
///
|
||||
/// ## Return
|
||||
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
|
||||
/// a subset of the given tasks.
|
||||
pub fn autobatch(
|
||||
enqueued: Vec<(TaskId, KindWithContent)>,
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
let mut enqueued = enqueued.into_iter();
|
||||
let (id, kind) = enqueued.next()?;
|
||||
|
||||
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
|
||||
let mut index_exist = index_already_exists;
|
||||
|
||||
let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) {
|
||||
(Continue(acc), create) => (acc, create),
|
||||
(Break(acc), create) => return Some((acc, create)),
|
||||
};
|
||||
|
||||
// if an index has been created in the previous step we can consider it as existing.
|
||||
index_exist |= must_create_index;
|
||||
|
||||
for (id, kind) in enqueued {
|
||||
acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) {
|
||||
Continue(acc) => acc,
|
||||
Break(acc) => return Some((acc, must_create_index)),
|
||||
};
|
||||
}
|
||||
|
||||
Some((acc, must_create_index))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use meilisearch_types::tasks::IndexSwap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::debug_snapshot;
|
||||
|
||||
fn autobatch_from(
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
input: impl IntoIterator<Item = KindWithContent>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
autobatch(
|
||||
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(),
|
||||
index_already_exists,
|
||||
primary_key,
|
||||
)
|
||||
}
|
||||
|
||||
fn doc_imp(
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> KindWithContent {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
primary_key: primary_key.map(|pk| pk.to_string()),
|
||||
method,
|
||||
content_file: Uuid::new_v4(),
|
||||
documents_count: 0,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: String::from("doggo"),
|
||||
documents_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del_fil() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: String::from("doggo"),
|
||||
filter_expr: serde_json::json!("cuteness > 100"),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_clr() -> KindWithContent {
|
||||
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn settings(allow_index_creation: bool) -> KindWithContent {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
new_settings: Default::default(),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn idx_create() -> KindWithContent {
|
||||
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_update() -> KindWithContent {
|
||||
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_del() -> KindWithContent {
|
||||
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn idx_swap() -> KindWithContent {
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_simple_operation_together() {
|
||||
// we can autobatch one or multiple `ReplaceDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// we can autobatch one or multiple `UpdateDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletion together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletionByFilter together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
|
||||
// we can autobatch one or multiple Settings together
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// We can autobatch document addition with document deletion
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
|
||||
// But we can't autobatch document addition with document deletion by filter
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_document_operation_dont_autobatch_with_other() {
|
||||
// addition, updates and deletion by filter can't batch together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions() {
|
||||
// these two doesn't need to batch
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
// Basic use case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// This batch kind doesn't mix with other document addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// But you can batch multiple clear together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions_and_settings() {
|
||||
// A clear don't need to autobatch the settings that happens AFTER there is no documents
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anything_and_index_deletion() {
|
||||
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
|
||||
// First, the basic cases
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowed_and_disallowed_index_creation() {
|
||||
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_primary_key() {
|
||||
// ==> If I have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// ==> If I don't have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
216
crates/index-scheduler/src/dump.rs
Normal file
216
crates/index-scheduler/src/dump.rs
Normal file
@@ -0,0 +1,216 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{utils, Error, IndexScheduler, Result};
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
|
||||
// While loading a dump no one should be able to access the scheduler thus I can block everything.
|
||||
let wtxn = index_scheduler.env.write_txn()?;
|
||||
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
let mut writer = io::BufWriter::new(file);
|
||||
for doc in content_file {
|
||||
let doc = doc?;
|
||||
serde_json::to_writer(&mut writer, &doc).map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})?;
|
||||
}
|
||||
let file = writer.into_inner().map_err(|e| e.into_error())?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
None if task.status == Status::Enqueued && task_has_no_docs => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
batch_uid: task.batch_uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentDeletionByFilter { filter } => {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
filter_expr: filter,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentEdition { filter, context, function } => {
|
||||
KindWithContent::DocumentEdition {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
filter_expr: filter,
|
||||
context,
|
||||
function,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(task.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(task.uid);
|
||||
self.indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_default().insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.statuses {
|
||||
self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.kinds {
|
||||
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -147,7 +147,9 @@ pub enum Error {
|
||||
#[error("Corrupted task queue.")]
|
||||
CorruptedTaskQueue,
|
||||
#[error(transparent)]
|
||||
TaskDatabaseUpdate(Box<Self>),
|
||||
DatabaseUpgrade(Box<Self>),
|
||||
#[error(transparent)]
|
||||
UnrecoverableError(Box<Self>),
|
||||
#[error(transparent)]
|
||||
HeedTransaction(heed::Error),
|
||||
|
||||
@@ -202,7 +204,8 @@ impl Error {
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
| Error::TaskDatabaseUpdate(_)
|
||||
| Error::DatabaseUpgrade(_)
|
||||
| Error::UnrecoverableError(_)
|
||||
| Error::HeedTransaction(_) => false,
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => false,
|
||||
@@ -266,7 +269,8 @@ impl ErrorCode for Error {
|
||||
Error::Anyhow(_) => Code::Internal,
|
||||
Error::CorruptedTaskQueue => Code::Internal,
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||
Error::UnrecoverableError(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
|
||||
@@ -7,7 +7,12 @@ use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
|
||||
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
/// The number of database used by features
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
@@ -56,19 +61,6 @@ impl RoFeatures {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "vector store",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/677",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.edit_documents_by_function {
|
||||
Ok(())
|
||||
@@ -97,14 +89,20 @@ impl RoFeatures {
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
wtxn: &mut RwTxn,
|
||||
instance_features: InstanceTogglableFeatures,
|
||||
) -> Result<Self> {
|
||||
let runtime_features_db =
|
||||
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
|
||||
|
||||
let txn = env.read_txn()?;
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
runtime_features_db.get(wtxn, db_name::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: metrics || persisted_features.metrics,
|
||||
@@ -121,7 +119,7 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.persisted.put(&mut wtxn, db_name::EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
|
||||
@@ -102,7 +102,7 @@ impl ReopenableIndex {
|
||||
return Ok(());
|
||||
}
|
||||
map.unavailable.remove(&self.uuid);
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size, false)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -171,11 +171,12 @@ impl IndexMap {
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
if !matches!(self.get_unavailable(uuid), Missing) {
|
||||
panic!("Attempt to open an index that was unavailable");
|
||||
}
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size, creation)?;
|
||||
match self.available.insert(*uuid, index.clone()) {
|
||||
InsertionOutcome::InsertedNew => (),
|
||||
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
|
||||
@@ -299,6 +300,7 @@ fn create_or_open_index(
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
@@ -308,9 +310,9 @@ fn create_or_open_index(
|
||||
}
|
||||
|
||||
if let Some((created, updated)) = date {
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated, creation)?)
|
||||
} else {
|
||||
Ok(Index::new(options, path)?)
|
||||
Ok(Index::new(options, path, creation)?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -323,7 +325,7 @@ mod tests {
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::super::IndexMapper;
|
||||
use crate::tests::IndexSchedulerHandle;
|
||||
use crate::test_utils::IndexSchedulerHandle;
|
||||
use crate::utils::clamp_to_page_size;
|
||||
use crate::IndexScheduler;
|
||||
|
||||
|
||||
@@ -16,12 +16,17 @@ use uuid::Uuid;
|
||||
use self::index_map::IndexMap;
|
||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{Error, Result};
|
||||
use crate::{Error, IndexBudget, IndexSchedulerOptions, Result};
|
||||
|
||||
mod index_map;
|
||||
|
||||
const INDEX_MAPPING: &str = "index-mapping";
|
||||
const INDEX_STATS: &str = "index-stats";
|
||||
/// The number of database used by index mapper
|
||||
const NUMBER_OF_DATABASES: u32 = 2;
|
||||
/// Database const names for the `IndexMapper`.
|
||||
mod db_name {
|
||||
pub const INDEX_MAPPING: &str = "index-mapping";
|
||||
pub const INDEX_STATS: &str = "index-stats";
|
||||
}
|
||||
|
||||
/// Structure managing meilisearch's indexes.
|
||||
///
|
||||
@@ -106,6 +111,8 @@ pub struct IndexStats {
|
||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||
/// this value is typically smaller than `database_size`.
|
||||
pub used_database_size: u64,
|
||||
/// The primary key of the index
|
||||
pub primary_key: Option<String>,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
/// Creation date of the index.
|
||||
@@ -127,6 +134,7 @@ impl IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
database_size: index.on_disk_size()?,
|
||||
used_database_size: index.used_size()?,
|
||||
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
||||
field_distribution: index.field_distribution(rtxn)?,
|
||||
created_at: index.created_at(rtxn)?,
|
||||
updated_at: index.updated_at(rtxn)?,
|
||||
@@ -135,29 +143,25 @@ impl IndexStats {
|
||||
}
|
||||
|
||||
impl IndexMapper {
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
base_path: PathBuf,
|
||||
index_base_map_size: usize,
|
||||
index_growth_amount: usize,
|
||||
index_count: usize,
|
||||
enable_mdb_writemap: bool,
|
||||
indexer_config: IndexerConfig,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
budget: IndexBudget,
|
||||
) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
|
||||
let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Self {
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
|
||||
index_mapping,
|
||||
index_stats,
|
||||
base_path,
|
||||
index_base_map_size,
|
||||
index_growth_amount,
|
||||
enable_mdb_writemap,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))),
|
||||
index_mapping: env.create_database(wtxn, Some(db_name::INDEX_MAPPING))?,
|
||||
index_stats: env.create_database(wtxn, Some(db_name::INDEX_STATS))?,
|
||||
base_path: options.indexes_path.clone(),
|
||||
index_base_map_size: budget.map_size,
|
||||
index_growth_amount: options.index_growth_amount,
|
||||
enable_mdb_writemap: options.enable_mdb_writemap,
|
||||
indexer_config: options.indexer_config.clone(),
|
||||
currently_updating_index: Default::default(),
|
||||
})
|
||||
}
|
||||
@@ -194,8 +198,14 @@ impl IndexMapper {
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
true,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
self.store_stats_of(&mut wtxn, name, &stats)?;
|
||||
drop(index_rtxn);
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -387,6 +397,7 @@ impl IndexMapper {
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
false,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
}
|
||||
|
||||
@@ -5,11 +5,12 @@ use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Task};
|
||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||
use meilisearch_types::versioning;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::{IndexScheduler, Kind, Status, BEI128};
|
||||
use crate::{IndexScheduler, BEI128};
|
||||
|
||||
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
// Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run.
|
||||
@@ -18,41 +19,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
scheduler.assert_internally_consistent();
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
cleanup_enabled: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
env,
|
||||
all_tasks,
|
||||
all_batches,
|
||||
batch_to_tasks_mapping,
|
||||
// task reverse index
|
||||
status,
|
||||
kind,
|
||||
index_tasks,
|
||||
canceled_by,
|
||||
enqueued_at,
|
||||
started_at,
|
||||
finished_at,
|
||||
|
||||
// batch reverse index
|
||||
batch_status,
|
||||
batch_kind,
|
||||
batch_index_tasks,
|
||||
batch_enqueued_at,
|
||||
batch_started_at,
|
||||
batch_finished_at,
|
||||
version,
|
||||
queue,
|
||||
scheduler,
|
||||
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
auth_path: _,
|
||||
version_file_path: _,
|
||||
webhook_url: _,
|
||||
webhook_authorization_header: _,
|
||||
test_breakpoint_sdr: _,
|
||||
@@ -65,8 +40,18 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let mut snap = String::new();
|
||||
|
||||
let indx_sched_version = version.get_version(&rtxn).unwrap();
|
||||
let latest_version = (
|
||||
versioning::VERSION_MAJOR.parse().unwrap(),
|
||||
versioning::VERSION_MINOR.parse().unwrap(),
|
||||
versioning::VERSION_PATCH.parse().unwrap(),
|
||||
);
|
||||
if indx_sched_version != Some(latest_version) {
|
||||
snap.push_str(&format!("index scheduler running on version {indx_sched_version:?}\n"));
|
||||
}
|
||||
|
||||
let processing = processing_tasks.read().unwrap().clone();
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled));
|
||||
snap.push_str(&format!(
|
||||
"### Processing batch {:?}:\n",
|
||||
processing.batch.as_ref().map(|batch| batch.uid)
|
||||
@@ -79,19 +64,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Tasks:\n");
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks));
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, *status));
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.tasks.status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, *kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Mapper:\n");
|
||||
@@ -99,55 +84,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Canceled By:\n");
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by));
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Batches:\n");
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, *all_batches));
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batch to tasks mapping:\n");
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping));
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, *batch_status));
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.batches.status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, *batch_kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### File Store:\n");
|
||||
snap.push_str(&snapshot_file_store(file_store));
|
||||
snap.push_str(&snapshot_file_store(&queue.file_store));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap
|
||||
@@ -306,6 +291,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexSwap { swaps } => {
|
||||
format!("{{ swaps: {swaps:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,14 +1,12 @@
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use enum_iterator::Sequence;
|
||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
|
||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView};
|
||||
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::utils::ProcessingBatch;
|
||||
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Default)]
|
||||
pub struct ProcessingTasks {
|
||||
pub batch: Option<Arc<ProcessingBatch>>,
|
||||
/// The list of tasks ids that are currently running.
|
||||
@@ -20,7 +18,7 @@ pub struct ProcessingTasks {
|
||||
impl ProcessingTasks {
|
||||
/// Creates an empty `ProcessingAt` struct.
|
||||
pub fn new() -> ProcessingTasks {
|
||||
ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
|
||||
ProcessingTasks::default()
|
||||
}
|
||||
|
||||
pub fn get_progress_view(&self) -> Option<ProgressView> {
|
||||
@@ -173,32 +171,6 @@ make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
pub struct VariableNameStep {
|
||||
name: String,
|
||||
current: u32,
|
||||
total: u32,
|
||||
}
|
||||
|
||||
impl VariableNameStep {
|
||||
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
||||
Self { name: name.into(), current, total }
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for VariableNameStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.name.clone().into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.total
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
611
crates/index-scheduler/src/queue/batches.rs
Normal file
611
crates/index-scheduler/src/queue/batches.rs
Normal file
@@ -0,0 +1,611 @@
|
||||
use std::collections::HashSet;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime,
|
||||
ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, Result, BEI128};
|
||||
|
||||
/// The number of database used by the batch queue
|
||||
const NUMBER_OF_DATABASES: u32 = 7;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const ALL_BATCHES: &str = "all-batches";
|
||||
|
||||
pub const BATCH_STATUS: &str = "batch-status";
|
||||
pub const BATCH_KIND: &str = "batch-kind";
|
||||
pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks";
|
||||
pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at";
|
||||
pub const BATCH_STARTED_AT: &str = "batch-started-at";
|
||||
pub const BATCH_FINISHED_AT: &str = "batch-finished-at";
|
||||
}
|
||||
|
||||
pub struct BatchQueue {
|
||||
/// Contains all the batches accessible by their Id.
|
||||
pub(crate) all_batches: Database<BEU32, SerdeJson<Batch>>,
|
||||
|
||||
/// All the batches containing a task matching the selected status.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the batches ids grouped by the kind of their task.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the batches associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing finished tasks started at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl BatchQueue {
|
||||
pub(crate) fn private_clone(&self) -> BatchQueue {
|
||||
BatchQueue {
|
||||
all_batches: self.all_batches,
|
||||
status: self.status,
|
||||
kind: self.kind,
|
||||
index_tasks: self.index_tasks,
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
|
||||
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,
|
||||
kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?,
|
||||
index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?,
|
||||
enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?,
|
||||
started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?,
|
||||
finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result<BatchId> {
|
||||
Ok(self
|
||||
.all_batches
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.last(rtxn)?
|
||||
.map(|(k, _)| k + 1)
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<Option<Batch>> {
|
||||
Ok(self.all_batches.get(rtxn, &batch_id)?)
|
||||
}
|
||||
|
||||
/// Returns the whole set of batches that belongs to this index.
|
||||
pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn update_index(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut batches = self.index_batches(wtxn, index)?;
|
||||
f(&mut batches);
|
||||
if batches.is_empty() {
|
||||
self.index_tasks.delete(wtxn, index)?;
|
||||
} else {
|
||||
self.index_tasks.put(wtxn, index, &batches)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
self.put_status(wtxn, status, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
self.put_kind(wtxn, kind, &tasks)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
|
||||
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
|
||||
|
||||
self.all_batches.put(
|
||||
wtxn,
|
||||
&batch.uid,
|
||||
&Batch {
|
||||
uid: batch.uid,
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
},
|
||||
)?;
|
||||
|
||||
// Update the statuses
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
for status in old_batch.stats.status.keys() {
|
||||
self.update_status(wtxn, *status, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for status in batch.statuses {
|
||||
self.update_status(wtxn, status, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the kinds / types
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let kinds: HashSet<_> = old_batch.stats.types.keys().cloned().collect();
|
||||
for kind in kinds.difference(&batch.kinds) {
|
||||
self.update_kind(wtxn, *kind, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for kind in batch.kinds {
|
||||
self.update_kind(wtxn, kind, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the indexes
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let indexes: HashSet<_> = old_batch.stats.index_uids.keys().cloned().collect();
|
||||
for index in indexes.difference(&batch.indexes) {
|
||||
self.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for index in batch.indexes {
|
||||
self.update_index(wtxn, &index, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the enqueued_at: we cannot retrieve the previous enqueued at from the previous batch, and
|
||||
// must instead go through the db looking for it. We cannot look at the task contained in this batch either
|
||||
// because they may have been removed.
|
||||
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
|
||||
// to the DB per batches.
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let started_at = old_batch.started_at.unix_timestamp_nanos();
|
||||
|
||||
// We have either one or two enqueued at to remove
|
||||
let mut exit = old_batch.stats.total_nb_tasks.clamp(0, 2);
|
||||
let mut iterator = self.enqueued_at.rev_iter_mut(wtxn)?;
|
||||
while let Some(entry) = iterator.next() {
|
||||
let (key, mut value) = entry?;
|
||||
if key > started_at {
|
||||
continue;
|
||||
}
|
||||
if value.remove(old_batch.uid) {
|
||||
exit = exit.saturating_sub(1);
|
||||
// Safe because the key and value are owned
|
||||
unsafe {
|
||||
iterator.put_current(&key, &value)?;
|
||||
}
|
||||
if exit == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(enqueued_at) = batch.oldest_enqueued_at {
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
|
||||
}
|
||||
if let Some(enqueued_at) = batch.earliest_enqueued_at {
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
|
||||
}
|
||||
|
||||
// Update the started at and finished at
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
remove_task_datetime(wtxn, self.started_at, old_batch.started_at, old_batch.uid)?;
|
||||
if let Some(finished_at) = old_batch.finished_at {
|
||||
remove_task_datetime(wtxn, self.finished_at, finished_at, old_batch.uid)?;
|
||||
}
|
||||
}
|
||||
insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of batches. The batches MUST exist or a
|
||||
/// `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_batches(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
tasks: impl IntoIterator<Item = BatchId>,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<Vec<Batch>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|batch_id| {
|
||||
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
let mut batch = processing.batch.as_ref().unwrap().to_batch();
|
||||
batch.progress = processing.get_progress_view();
|
||||
Ok(batch)
|
||||
} else {
|
||||
self.get_batch(rtxn, batch_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
}
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
/// Return the batch ids matched by the given query from the index scheduler's point of view.
|
||||
pub(crate) fn get_batch_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let Query {
|
||||
limit,
|
||||
from,
|
||||
reverse,
|
||||
uids,
|
||||
batch_uids,
|
||||
statuses,
|
||||
types,
|
||||
index_uids,
|
||||
canceled_by,
|
||||
before_enqueued_at,
|
||||
after_enqueued_at,
|
||||
before_started_at,
|
||||
after_started_at,
|
||||
before_finished_at,
|
||||
after_finished_at,
|
||||
} = query;
|
||||
|
||||
let mut batches = self.batches.all_batch_ids(rtxn)?;
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
batches.insert(batch_id);
|
||||
}
|
||||
|
||||
if let Some(from) = from {
|
||||
let range = if reverse.unwrap_or_default() {
|
||||
u32::MIN..*from
|
||||
} else {
|
||||
from.saturating_add(1)..u32::MAX
|
||||
};
|
||||
batches.remove_range(range);
|
||||
}
|
||||
|
||||
if let Some(batch_uids) = &batch_uids {
|
||||
let batches_uids = RoaringBitmap::from_iter(batch_uids);
|
||||
batches &= batches_uids;
|
||||
}
|
||||
|
||||
if let Some(status) = &statuses {
|
||||
let mut status_batches = RoaringBitmap::new();
|
||||
for status in status {
|
||||
match status {
|
||||
// special case for Processing batches
|
||||
Status::Processing => {
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
status_batches.insert(batch_id);
|
||||
}
|
||||
}
|
||||
// Enqueued tasks are not stored in batches
|
||||
Status::Enqueued => (),
|
||||
status => status_batches |= &self.batches.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
if let Some(ref batch) = processing.batch {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
batches &= status_batches;
|
||||
}
|
||||
|
||||
if let Some(task_uids) = &uids {
|
||||
let mut batches_by_task_uids = RoaringBitmap::new();
|
||||
for task_uid in task_uids {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? {
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
batches_by_task_uids.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
batches &= batches_by_task_uids;
|
||||
}
|
||||
|
||||
// There is no database for this query, we must retrieve the task queried by the client and ensure it's valid
|
||||
if let Some(canceled_by) = &canceled_by {
|
||||
let mut all_canceled_batches = RoaringBitmap::new();
|
||||
for cancel_uid in canceled_by {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? {
|
||||
if task.kind.as_kind() == Kind::TaskCancelation
|
||||
&& task.status == Status::Succeeded
|
||||
{
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
all_canceled_batches.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the canceled_by has been specified but no batch
|
||||
// matches then we prefer matching zero than all batches.
|
||||
if all_canceled_batches.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
} else {
|
||||
batches &= all_canceled_batches;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kind) = &types {
|
||||
let mut kind_batches = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_batches |= self.batches.get_kind(rtxn, *kind)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid))
|
||||
{
|
||||
kind_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &kind_batches;
|
||||
}
|
||||
|
||||
if let Some(index) = &index_uids {
|
||||
let mut index_batches = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_batches |= self.batches.index_batches(rtxn, index)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.indexes.contains(index).then_some(batch.uid))
|
||||
{
|
||||
index_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &index_batches;
|
||||
}
|
||||
|
||||
// For the started_at filter, we need to treat the part of the batches that are processing from the part of the
|
||||
// batches that are not processing. The non-processing ones are filtered normally while the processing ones
|
||||
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
||||
batches = {
|
||||
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
||||
(&batches - &*processing.processing, &batches & &*processing.processing);
|
||||
|
||||
// special case for Processing batches
|
||||
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
||||
let mut clear_filtered_processing_batches =
|
||||
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let is_within_dates = RangeBounds::contains(
|
||||
&(start, end),
|
||||
&processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
|
||||
.unix_timestamp_nanos(),
|
||||
);
|
||||
if !is_within_dates {
|
||||
filtered_processing_batches.clear();
|
||||
}
|
||||
};
|
||||
match (after_started_at, before_started_at) {
|
||||
(None, None) => (),
|
||||
(None, Some(before)) => {
|
||||
clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before))
|
||||
}
|
||||
(Some(after), None) => {
|
||||
clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded)
|
||||
}
|
||||
(Some(after), Some(before)) => clear_filtered_processing_batches(
|
||||
Bound::Excluded(*after),
|
||||
Bound::Excluded(*before),
|
||||
),
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut filtered_non_processing_batches,
|
||||
self.batches.started_at,
|
||||
*after_started_at,
|
||||
*before_started_at,
|
||||
)?;
|
||||
filtered_non_processing_batches | filtered_processing_batches
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.enqueued_at,
|
||||
*after_enqueued_at,
|
||||
*before_enqueued_at,
|
||||
)?;
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.finished_at,
|
||||
*after_finished_at,
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
batches = if query.reverse.unwrap_or_default() {
|
||||
batches.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
batches.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
|
||||
Ok(batches)
|
||||
}
|
||||
|
||||
/// Return the batch ids matching the query along with the total number of batches
|
||||
/// by ignoring the from and limit parameters from the user's point of view.
|
||||
///
|
||||
/// There are two differences between an internal query and a query executed by
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub(crate) fn get_batch_ids_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(RoaringBitmap, u64)> {
|
||||
// compute all batches matching the filter by ignoring the limits, to find the number of batches matching
|
||||
// the filter.
|
||||
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
|
||||
// us from modifying the underlying implementation, and the performance remains sufficient.
|
||||
// Should this change, we would modify `get_batch_ids` to directly return the number of matching batches.
|
||||
let total_batches =
|
||||
self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?;
|
||||
let mut batches = self.get_batch_ids(rtxn, query, processing)?;
|
||||
|
||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||
// then we must exclude all the batches that only contains tasks associated to multiple indexes.
|
||||
// This works because we don't autobatch tasks associated to multiple indexes with tasks associated
|
||||
// to a single index. e.g: IndexSwap cannot be batched with IndexCreation.
|
||||
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||
batches -= self.tasks.get_kind(rtxn, kind)?;
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
if batch.kinds.contains(&kind) {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Any batch that is internally associated with at least one authorized index
|
||||
// must be returned.
|
||||
if !filters.all_indexes_authorized() {
|
||||
let mut valid_indexes = RoaringBitmap::new();
|
||||
let mut forbidden_indexes = RoaringBitmap::new();
|
||||
|
||||
let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?;
|
||||
for result in all_indexes_iter {
|
||||
let (index, index_tasks) = result?;
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes |= index_tasks;
|
||||
} else {
|
||||
forbidden_indexes |= index_tasks;
|
||||
}
|
||||
}
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
for index in &batch.indexes {
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes.insert(batch.uid);
|
||||
} else {
|
||||
forbidden_indexes.insert(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a batch had ONE valid task then it should be returned
|
||||
let invalid_batches = forbidden_indexes - valid_indexes;
|
||||
|
||||
batches -= invalid_batches;
|
||||
}
|
||||
|
||||
Ok((batches, total_batches.len()))
|
||||
}
|
||||
|
||||
pub(crate) fn get_batches_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(Vec<Batch>, u64)> {
|
||||
let (batches, total) =
|
||||
self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?;
|
||||
let batches = if query.reverse.unwrap_or_default() {
|
||||
Box::new(batches.into_iter()) as Box<dyn Iterator<Item = u32>>
|
||||
} else {
|
||||
Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
|
||||
let batches = self.batches.get_existing_batches(
|
||||
rtxn,
|
||||
batches.take(query.limit.unwrap_or(u32::MAX) as usize),
|
||||
processing,
|
||||
)?;
|
||||
|
||||
Ok((batches, total))
|
||||
}
|
||||
}
|
||||
473
crates/index-scheduler/src/queue/batches_test.rs
Normal file
473
crates/index-scheduler/src/queue/batches_test.rs
Normal file
@@ -0,0 +1,473 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, FailureLocation};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn query_batches_from_and_limit() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
let kind = index_creation_task("whalo", "plankton");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
let kind = index_creation_task("catto", "his_own_vomit");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_n_successful_batches(3);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
|
||||
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query { from: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_simple() {
|
||||
let start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (mut batches, _) = index_scheduler
|
||||
.get_batches_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks
|
||||
|
||||
let query =
|
||||
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should excludes all of them
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should exclude the enqueued tasks and include the only processing task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit");
|
||||
|
||||
let second_start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should include all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the second part of the test and before one minute after the
|
||||
// second start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// we run the same query to verify that, and indeed find that the last task is matched
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||
// again only return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// now the last task should have failed
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// so running the last query should return nothing
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![1]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with an invalid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![2]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with a valid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_special_rules() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||
// associated with doggo -> empty result
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
drop(rtxn);
|
||||
// We're going to advance and process all the batches for the next query to actually hit the db
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
]);
|
||||
handle.advance_one_successful_batch();
|
||||
handle.advance_n_failed_batches(2);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything");
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||
// -> only the index creation of doggo should be returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![
|
||||
IndexUidPattern::new_unchecked("catto"),
|
||||
IndexUidPattern::new_unchecked("doggo"),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||
// -> all tasks except the swap of catto with whalo are returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_canceled_by() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
let kind = KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_string(),
|
||||
tasks: [0, 1, 2, 3].into_iter().collect(),
|
||||
};
|
||||
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// The batch zero was the index creation task, the 1 is the task cancellation
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
// Return only 1 because the user is not authorized to see task 2
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
}
|
||||
385
crates/index-scheduler/src/queue/mod.rs
Normal file
385
crates/index-scheduler/src/queue/mod.rs
Normal file
@@ -0,0 +1,385 @@
|
||||
mod batches;
|
||||
#[cfg(test)]
|
||||
mod batches_test;
|
||||
mod tasks;
|
||||
#[cfg(test)]
|
||||
mod tasks_test;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub(crate) use self::batches::BatchQueue;
|
||||
pub(crate) use self::tasks::TaskQueue;
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexSchedulerOptions, Result, TaskId};
|
||||
|
||||
/// The number of database used by queue itself
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
|
||||
}
|
||||
|
||||
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
|
||||
///
|
||||
/// An empty/default query (where each field is set to `None`) matches all tasks.
|
||||
/// Each non-null field restricts the set of tasks further.
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Query {
|
||||
/// The maximum number of tasks to be matched
|
||||
pub limit: Option<u32>,
|
||||
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub from: Option<u32>,
|
||||
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
|
||||
pub reverse: Option<bool>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub uids: Option<Vec<TaskId>>,
|
||||
/// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched
|
||||
pub batch_uids: Option<Vec<BatchId>>,
|
||||
/// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls
|
||||
pub statuses: Option<Vec<Status>>,
|
||||
/// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks.
|
||||
///
|
||||
/// The kind of a task is given by:
|
||||
/// ```
|
||||
/// # use meilisearch_types::tasks::{Task, Kind};
|
||||
/// # fn doc_func(task: Task) -> Kind {
|
||||
/// task.kind.as_kind()
|
||||
/// # }
|
||||
/// ```
|
||||
pub types: Option<Vec<Kind>>,
|
||||
/// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks
|
||||
pub index_uids: Option<Vec<String>>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks
|
||||
/// that canceled the matched tasks.
|
||||
pub canceled_by: Option<Vec<TaskId>>,
|
||||
/// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub before_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub after_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub before_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub after_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub before_finished_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub after_finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
/// Return `true` if every field of the query is set to `None`, such that the query
|
||||
/// matches all tasks.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
reverse: None,
|
||||
uids: None,
|
||||
batch_uids: None,
|
||||
statuses: None,
|
||||
types: None,
|
||||
index_uids: None,
|
||||
canceled_by: None,
|
||||
before_enqueued_at: None,
|
||||
after_enqueued_at: None,
|
||||
before_started_at: None,
|
||||
after_started_at: None,
|
||||
before_finished_at: None,
|
||||
after_finished_at: None,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
|
||||
pub fn with_index(self, index_uid: String) -> Self {
|
||||
let mut index_vec = self.index_uids.unwrap_or_default();
|
||||
index_vec.push(index_uid);
|
||||
Self { index_uids: Some(index_vec), ..self }
|
||||
}
|
||||
|
||||
// Removes the `from` and `limit` restrictions from the query.
|
||||
// Useful to get the total number of tasks matching a filter.
|
||||
pub fn without_limits(self) -> Self {
|
||||
Query { limit: None, from: None, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
/// to be performed on them.
|
||||
pub struct Queue {
|
||||
pub(crate) tasks: tasks::TaskQueue,
|
||||
pub(crate) batches: batches::BatchQueue,
|
||||
|
||||
/// Matches a batch id with the associated task ids.
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
|
||||
/// The list of files referenced by the tasks.
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
pub(crate) fn private_clone(&self) -> Queue {
|
||||
Queue {
|
||||
tasks: self.tasks.private_clone(),
|
||||
batches: self.batches.private_clone(),
|
||||
batch_to_tasks_mapping: self.batch_to_tasks_mapping,
|
||||
file_store: self.file_store.clone(),
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
tasks::TaskQueue::nb_db() + batches::BatchQueue::nb_db() + NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub(crate) fn new(
|
||||
env: &Env,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
) -> Result<Self> {
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
Ok(Self {
|
||||
file_store: FileStore::new(&options.update_file_path)?,
|
||||
batch_to_tasks_mapping: env
|
||||
.create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?,
|
||||
tasks: TaskQueue::new(env, wtxn)?,
|
||||
batches: BatchQueue::new(env, wtxn)?,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this batch.
|
||||
pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<RoaringBitmap> {
|
||||
Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks.
|
||||
///
|
||||
/// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_tasks_for_processing_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing_batch: &mut ProcessingBatch,
|
||||
tasks: impl IntoIterator<Item = TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|task_id| {
|
||||
let mut task = self
|
||||
.tasks
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue));
|
||||
processing_batch.processing(&mut task);
|
||||
task
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
batch: ProcessingBatch,
|
||||
tasks: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?;
|
||||
self.batches.write_batch(wtxn, batch)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
|
||||
match task.content_uuid() {
|
||||
Some(content_file) => self.delete_update_file(content_file),
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a file from the index scheduler.
|
||||
///
|
||||
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.
|
||||
pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> {
|
||||
Ok(self.file_store.delete(uuid)?)
|
||||
}
|
||||
|
||||
/// Create a file and register it in the index scheduler.
|
||||
///
|
||||
/// The returned file and uuid can be used to associate
|
||||
/// some data to a task. The file will be kept until
|
||||
/// the task has been fully processed.
|
||||
pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> {
|
||||
if dry_run {
|
||||
Ok((Uuid::nil(), file_store::File::dry_file()?))
|
||||
} else {
|
||||
Ok(self.file_store.new_update()?)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> {
|
||||
Ok(self.file_store.new_update_with_uuid(uuid)?)
|
||||
}
|
||||
|
||||
/// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes.
|
||||
pub fn compute_update_file_size(&self) -> Result<u64> {
|
||||
Ok(self.file_store.compute_total_size()?)
|
||||
}
|
||||
|
||||
pub fn register(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: &KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
|
||||
if let Some(uid) = task_id {
|
||||
if uid < next_task_id {
|
||||
return Err(Error::BadTaskId { received: uid, expected: next_task_id });
|
||||
}
|
||||
}
|
||||
|
||||
let mut task = Task {
|
||||
uid: task_id.unwrap_or(next_task_id),
|
||||
// The batch is defined once we starts processing the task
|
||||
batch_uid: None,
|
||||
enqueued_at: OffsetDateTime::now_utc(),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
error: None,
|
||||
canceled_by: None,
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
filter_out_references_to_newer_tasks(&mut task);
|
||||
// If the register task is an index swap task, verify that it is well-formed
|
||||
// (that it does not contain duplicate indexes).
|
||||
check_index_swap_validity(&task)?;
|
||||
|
||||
// At this point the task is going to be registered and no further checks will be done
|
||||
if dry_run {
|
||||
return Ok(task);
|
||||
}
|
||||
|
||||
// Get rid of the mutability.
|
||||
let task = task;
|
||||
self.tasks.register(wtxn, &task)?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> {
|
||||
let nb_tasks = self.tasks.all_task_ids(wtxn)?.len();
|
||||
// if we have less than 1M tasks everything is fine
|
||||
if nb_tasks < self.max_number_of_tasks as u64 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
|
||||
let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
|
||||
|
||||
self.register(
|
||||
wtxn,
|
||||
&KindWithContent::TaskDeletion {
|
||||
query: format!(
|
||||
"?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
|
||||
delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
|
||||
),
|
||||
tasks: to_delete,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_stats(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||
let mut res = BTreeMap::new();
|
||||
let processing_tasks = processing.processing.len();
|
||||
|
||||
res.insert(
|
||||
"statuses".to_string(),
|
||||
enum_iterator::all::<Status>()
|
||||
.map(|s| {
|
||||
let tasks = self.tasks.get_status(rtxn, s)?.len();
|
||||
match s {
|
||||
Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
|
||||
Status::Processing => Ok((s.to_string(), processing_tasks)),
|
||||
s => Ok((s.to_string(), tasks)),
|
||||
}
|
||||
})
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"types".to_string(),
|
||||
enum_iterator::all::<Kind>()
|
||||
.map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len())))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"indexes".to_string(),
|
||||
self.tasks
|
||||
.index_tasks
|
||||
.iter(rtxn)?
|
||||
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
541
crates/index-scheduler/src/queue/tasks.rs
Normal file
541
crates/index-scheduler/src/queue/tasks.rs
Normal file
@@ -0,0 +1,541 @@
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
self, insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime,
|
||||
};
|
||||
use crate::{Error, Result, TaskId, BEI128};
|
||||
|
||||
/// The number of database used by the task queue
|
||||
const NUMBER_OF_DATABASES: u32 = 8;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const ALL_TASKS: &str = "all-tasks";
|
||||
|
||||
pub const STATUS: &str = "status";
|
||||
pub const KIND: &str = "kind";
|
||||
pub const INDEX_TASKS: &str = "index-tasks";
|
||||
pub const CANCELED_BY: &str = "canceled_by";
|
||||
pub const ENQUEUED_AT: &str = "enqueued-at";
|
||||
pub const STARTED_AT: &str = "started-at";
|
||||
pub const FINISHED_AT: &str = "finished-at";
|
||||
}
|
||||
|
||||
pub struct TaskQueue {
|
||||
/// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
// TODO we should not be able to serialize a `Status::Processing` in this database.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the tasks ids grouped by their kind.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl TaskQueue {
|
||||
pub(crate) fn private_clone(&self) -> TaskQueue {
|
||||
TaskQueue {
|
||||
all_tasks: self.all_tasks,
|
||||
status: self.status,
|
||||
kind: self.kind,
|
||||
index_tasks: self.index_tasks,
|
||||
canceled_by: self.canceled_by,
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(crate) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
|
||||
status: env.create_database(wtxn, Some(db_name::STATUS))?,
|
||||
kind: env.create_database(wtxn, Some(db_name::KIND))?,
|
||||
index_tasks: env.create_database(wtxn, Some(db_name::INDEX_TASKS))?,
|
||||
canceled_by: env.create_database(wtxn, Some(db_name::CANCELED_BY))?,
|
||||
enqueued_at: env.create_database(wtxn, Some(db_name::ENQUEUED_AT))?,
|
||||
started_at: env.create_database(wtxn, Some(db_name::STARTED_AT))?,
|
||||
finished_at: env.create_database(wtxn, Some(db_name::FINISHED_AT))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
|
||||
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
|
||||
}
|
||||
|
||||
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
|
||||
Ok(self.last_task_id(rtxn)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
|
||||
Ok(self.all_tasks.get(rtxn, &task_id)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
let reprocessing = old_task.status != Status::Enqueued;
|
||||
|
||||
debug_assert!(old_task != *task);
|
||||
debug_assert_eq!(old_task.uid, task.uid);
|
||||
|
||||
// If we're processing a task that failed it may already contains a batch_uid
|
||||
debug_assert!(
|
||||
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
|
||||
"\n==> old: {old_task:?}\n==> new: {task:?}"
|
||||
);
|
||||
|
||||
if old_task.status != task.status {
|
||||
self.update_status(wtxn, old_task.status, |bitmap| {
|
||||
bitmap.remove(task.uid);
|
||||
})?;
|
||||
self.update_status(wtxn, task.status, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
if old_task.kind.as_kind() != task.kind.as_kind() {
|
||||
self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| {
|
||||
bitmap.remove(task.uid);
|
||||
})?;
|
||||
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
old_task.enqueued_at, task.enqueued_at,
|
||||
"Cannot update a task's enqueued_at time"
|
||||
);
|
||||
if old_task.started_at != task.started_at {
|
||||
assert!(
|
||||
reprocessing || old_task.started_at.is_none(),
|
||||
"Cannot update a task's started_at time"
|
||||
);
|
||||
if let Some(started_at) = old_task.started_at {
|
||||
remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
if let Some(started_at) = task.started_at {
|
||||
insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
if old_task.finished_at != task.finished_at {
|
||||
assert!(
|
||||
reprocessing || old_task.finished_at.is_none(),
|
||||
"Cannot update a task's finished_at time"
|
||||
);
|
||||
if let Some(finished_at) = old_task.finished_at {
|
||||
remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this index.
|
||||
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn update_index(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.index_tasks(wtxn, index)?;
|
||||
f(&mut tasks);
|
||||
if tasks.is_empty() {
|
||||
self.index_tasks.delete(wtxn, index)?;
|
||||
} else {
|
||||
self.index_tasks.put(wtxn, index, &tasks)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
self.put_status(wtxn, status, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
self.put_kind(wtxn, kind, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
|
||||
/// `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_tasks(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
tasks: impl IntoIterator<Item = TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|task_id| {
|
||||
self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn register(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
self.update_status(wtxn, Status::Enqueued, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
utils::insert_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
/// Return the task ids matched by the given query from the index scheduler's point of view.
|
||||
pub(crate) fn get_task_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } =
|
||||
processing_tasks;
|
||||
let Query {
|
||||
limit,
|
||||
from,
|
||||
reverse,
|
||||
uids,
|
||||
batch_uids,
|
||||
statuses,
|
||||
types,
|
||||
index_uids,
|
||||
canceled_by,
|
||||
before_enqueued_at,
|
||||
after_enqueued_at,
|
||||
before_started_at,
|
||||
after_started_at,
|
||||
before_finished_at,
|
||||
after_finished_at,
|
||||
} = query;
|
||||
|
||||
let mut tasks = self.tasks.all_task_ids(rtxn)?;
|
||||
|
||||
if let Some(from) = from {
|
||||
let range = if reverse.unwrap_or_default() {
|
||||
u32::MIN..*from
|
||||
} else {
|
||||
from.saturating_add(1)..u32::MAX
|
||||
};
|
||||
tasks.remove_range(range);
|
||||
}
|
||||
|
||||
if let Some(batch_uids) = batch_uids {
|
||||
let mut batch_tasks = RoaringBitmap::new();
|
||||
for batch_uid in batch_uids {
|
||||
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
||||
batch_tasks |= &**processing_tasks;
|
||||
} else {
|
||||
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
||||
}
|
||||
}
|
||||
tasks &= batch_tasks;
|
||||
}
|
||||
|
||||
if let Some(status) = statuses {
|
||||
let mut status_tasks = RoaringBitmap::new();
|
||||
for status in status {
|
||||
match status {
|
||||
// special case for Processing tasks
|
||||
Status::Processing => {
|
||||
status_tasks |= &**processing_tasks;
|
||||
}
|
||||
status => status_tasks |= &self.tasks.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
tasks -= &**processing_tasks;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
|
||||
if let Some(uids) = uids {
|
||||
let uids = RoaringBitmap::from_iter(uids);
|
||||
tasks &= &uids;
|
||||
}
|
||||
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
let mut all_canceled_tasks = RoaringBitmap::new();
|
||||
for cancel_task_uid in canceled_by {
|
||||
if let Some(canceled_by_uid) = self.tasks.canceled_by.get(rtxn, cancel_task_uid)? {
|
||||
all_canceled_tasks |= canceled_by_uid;
|
||||
}
|
||||
}
|
||||
|
||||
// if the canceled_by has been specified but no task
|
||||
// matches then we prefer matching zero than all tasks.
|
||||
if all_canceled_tasks.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
} else {
|
||||
tasks &= all_canceled_tasks;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kind) = types {
|
||||
let mut kind_tasks = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_tasks |= self.tasks.get_kind(rtxn, *kind)?;
|
||||
}
|
||||
tasks &= &kind_tasks;
|
||||
}
|
||||
|
||||
if let Some(index) = index_uids {
|
||||
let mut index_tasks = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_tasks |= self.tasks.index_tasks(rtxn, index)?;
|
||||
}
|
||||
tasks &= &index_tasks;
|
||||
}
|
||||
|
||||
// For the started_at filter, we need to treat the part of the tasks that are processing from the part of the
|
||||
// tasks that are not processing. The non-processing ones are filtered normally while the processing ones
|
||||
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
||||
tasks = {
|
||||
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
||||
(&tasks - &**processing_tasks, &tasks & &**processing_tasks);
|
||||
|
||||
// special case for Processing tasks
|
||||
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
||||
let mut clear_filtered_processing_tasks =
|
||||
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let is_within_dates = RangeBounds::contains(
|
||||
&(start, end),
|
||||
&processing_batch
|
||||
.as_ref()
|
||||
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
|
||||
.unix_timestamp_nanos(),
|
||||
);
|
||||
if !is_within_dates {
|
||||
filtered_processing_tasks.clear();
|
||||
}
|
||||
};
|
||||
match (after_started_at, before_started_at) {
|
||||
(None, None) => (),
|
||||
(None, Some(before)) => {
|
||||
clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before))
|
||||
}
|
||||
(Some(after), None) => {
|
||||
clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded)
|
||||
}
|
||||
(Some(after), Some(before)) => clear_filtered_processing_tasks(
|
||||
Bound::Excluded(*after),
|
||||
Bound::Excluded(*before),
|
||||
),
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut filtered_non_processing_tasks,
|
||||
self.tasks.started_at,
|
||||
*after_started_at,
|
||||
*before_started_at,
|
||||
)?;
|
||||
filtered_non_processing_tasks | filtered_processing_tasks
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut tasks,
|
||||
self.tasks.enqueued_at,
|
||||
*after_enqueued_at,
|
||||
*before_enqueued_at,
|
||||
)?;
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut tasks,
|
||||
self.tasks.finished_at,
|
||||
*after_finished_at,
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
tasks = if query.reverse.unwrap_or_default() {
|
||||
tasks.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
tasks.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub(crate) fn get_task_ids_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<(RoaringBitmap, u64)> {
|
||||
// compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching
|
||||
// the filter.
|
||||
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
|
||||
// us from modifying the underlying implementation, and the performance remains sufficient.
|
||||
// Should this change, we would modify `get_task_ids` to directly return the number of matching tasks.
|
||||
let total_tasks =
|
||||
self.get_task_ids(rtxn, &query.clone().without_limits(), processing_tasks)?;
|
||||
let mut tasks = self.get_task_ids(rtxn, query, processing_tasks)?;
|
||||
|
||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||
// then we must exclude all the kinds that aren't associated to one and only one index.
|
||||
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||
tasks -= self.tasks.get_kind(rtxn, kind)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Any task that is internally associated with a non-authorized index
|
||||
// must be discarded.
|
||||
if !filters.all_indexes_authorized() {
|
||||
let all_indexes_iter = self.tasks.index_tasks.iter(rtxn)?;
|
||||
for result in all_indexes_iter {
|
||||
let (index, index_tasks) = result?;
|
||||
if !filters.is_index_authorized(index) {
|
||||
tasks -= index_tasks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((tasks, total_tasks.len()))
|
||||
}
|
||||
|
||||
pub(crate) fn get_tasks_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<(Vec<Task>, u64)> {
|
||||
let (tasks, total) =
|
||||
self.get_task_ids_from_authorized_indexes(rtxn, query, filters, processing_tasks)?;
|
||||
let tasks = if query.reverse.unwrap_or_default() {
|
||||
Box::new(tasks.into_iter()) as Box<dyn Iterator<Item = u32>>
|
||||
} else {
|
||||
Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
let tasks = self
|
||||
.tasks
|
||||
.get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
|
||||
|
||||
let ProcessingTasks { batch, processing, progress: _ } = processing_tasks;
|
||||
|
||||
let ret = tasks.into_iter();
|
||||
if processing.is_empty() || batch.is_none() {
|
||||
Ok((ret.collect(), total))
|
||||
} else {
|
||||
// Safe because we ensured there was a batch in the previous branch
|
||||
let batch = batch.as_ref().unwrap();
|
||||
Ok((
|
||||
ret.map(|task| {
|
||||
if processing.contains(task.uid) {
|
||||
Task {
|
||||
status: Status::Processing,
|
||||
batch_uid: Some(batch.uid),
|
||||
started_at: Some(batch.started_at),
|
||||
..task
|
||||
}
|
||||
} else {
|
||||
task
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
total,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
441
crates/index-scheduler/src/queue/tasks_test.rs
Normal file
441
crates/index-scheduler/src/queue/tasks_test.rs
Normal file
@@ -0,0 +1,441 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, FailureLocation};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn query_tasks_from_and_limit() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
let kind = index_creation_task("whalo", "plankton");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
let kind = index_creation_task("catto", "his_own_vomit");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_n_successful_batches(3);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let processing = index_scheduler.processing_tasks.read().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query { from: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_simple() {
|
||||
let start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
|
||||
|
||||
let query =
|
||||
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should excludes all of them
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should exclude the enqueued tasks and include the only processing task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let second_start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should include all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the second part of the test and before one minute after the
|
||||
// second start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// we run the same query to verify that, and indeed find that the last task is matched
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||
// again only return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// now the last task should have failed
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// so running the last query should return nothing
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![1]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with an invalid uid
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![2]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with a valid uid
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_special_rules() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||
// associated with doggo -> empty result
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||
// -> only the index creation of doggo should be returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![
|
||||
IndexUidPattern::new_unchecked("catto"),
|
||||
IndexUidPattern::new_unchecked("doggo"),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||
// -> all tasks except the swap of catto with whalo are returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_canceled_by() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
let kind = KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_string(),
|
||||
tasks: [0, 1, 2, 3].into_iter().collect(),
|
||||
};
|
||||
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
|
||||
// taskCancelation itself
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// Return only 1 because the user is not authorized to see task 2
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
}
|
||||
398
crates/index-scheduler/src/queue/test.rs
Normal file
398
crates/index-scheduler/src/queue/test.rs
Normal file
@@ -0,0 +1,398 @@
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::error::ErrorCode;
|
||||
use meilisearch_types::tasks::{KindWithContent, Status};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, replace_document_import_task};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn register() {
|
||||
// In this test, the handle doesn't make any progress, we only check that the tasks are registered
|
||||
let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kinds = [
|
||||
index_creation_task("catto", "mouse"),
|
||||
replace_document_import_task("catto", None, 0, 12),
|
||||
replace_document_import_task("catto", None, 1, 50),
|
||||
replace_document_import_task("doggo", Some("bone"), 2, 5000),
|
||||
];
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
file.persist().unwrap();
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(1).unwrap();
|
||||
file.persist().unwrap();
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(2).unwrap();
|
||||
file.persist().unwrap();
|
||||
|
||||
for (idx, kind) in kinds.into_iter().enumerate() {
|
||||
let k = kind.as_kind();
|
||||
let task = index_scheduler.register(kind, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
assert_eq!(task.uid, idx as u32);
|
||||
assert_eq!(task.status, Status::Enqueued);
|
||||
assert_eq!(task.kind.as_kind(), k);
|
||||
}
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dry_run() {
|
||||
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, None, true).unwrap();
|
||||
snapshot!(task.uid, @"0");
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, Some(12), true).unwrap();
|
||||
snapshot!(task.uid, @"12");
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_set_taskid() {
|
||||
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(task.uid, @"0");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, Some(12), false).unwrap();
|
||||
snapshot!(task.uid, @"12");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let error = index_scheduler.register(kind, Some(5), false).unwrap_err();
|
||||
snapshot!(error, @"Received bad task id: 5 should be >= to 13.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disable_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.cleanup_enabled = false;
|
||||
config.max_number_of_tasks = 2;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function no new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.max_number_of_tasks = 2;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
// a new task deletion has been enqueued
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_queue_is_full() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
// that's the minimum map size possible
|
||||
config.task_db_size = 1048576;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
// on average this task takes ~600 bytes
|
||||
loop {
|
||||
let result = index_scheduler.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
);
|
||||
if result.is_err() {
|
||||
break;
|
||||
}
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
// at this point the task DB shoud have reached its limit and we should not be able to register new tasks
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// Even the task deletion that doesn't delete anything shouldn't be accepted
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// But a task deletion that delete something should works
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// Now we should be able to enqueue a few tasks again
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
474
crates/index-scheduler/src/scheduler/autobatcher.rs
Normal file
474
crates/index-scheduler/src/scheduler/autobatcher.rs
Normal file
@@ -0,0 +1,474 @@
|
||||
/*!
|
||||
The autobatcher is responsible for combining the next enqueued
|
||||
tasks affecting a single index into a [batch](crate::batch::Batch).
|
||||
|
||||
The main function of the autobatcher is [`next_autobatch`].
|
||||
*/
|
||||
|
||||
use meilisearch_types::tasks::TaskId;
|
||||
use std::ops::ControlFlow::{self, Break, Continue};
|
||||
|
||||
use crate::KindWithContent;
|
||||
|
||||
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
|
||||
/// for the purpose of simplifying the implementation of the autobatcher.
|
||||
///
|
||||
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
|
||||
enum AutobatchKind {
|
||||
DocumentImport { allow_index_creation: bool, primary_key: Option<String> },
|
||||
DocumentEdition,
|
||||
DocumentDeletion { by_filter: bool },
|
||||
DocumentClear,
|
||||
Settings { allow_index_creation: bool },
|
||||
IndexCreation,
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { allow_index_creation, .. }
|
||||
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<KindWithContent> for AutobatchKind {
|
||||
fn from(kind: KindWithContent) -> Self {
|
||||
match kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
allow_index_creation, primary_key, ..
|
||||
} => AutobatchKind::DocumentImport { allow_index_creation, primary_key },
|
||||
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||
KindWithContent::DocumentDeletion { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: false }
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: true }
|
||||
}
|
||||
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
||||
AutobatchKind::Settings {
|
||||
allow_index_creation: allow_index_creation && !is_deletion,
|
||||
}
|
||||
}
|
||||
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BatchKind {
|
||||
DocumentClear {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentOperation {
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentEdition {
|
||||
id: TaskId,
|
||||
},
|
||||
DocumentDeletion {
|
||||
deletion_ids: Vec<TaskId>,
|
||||
includes_by_filter: bool,
|
||||
},
|
||||
ClearAndSettings {
|
||||
other: Vec<TaskId>,
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
IndexDeletion {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
IndexCreation {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexUpdate {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
// TODO use an AutoBatchKind as input
|
||||
pub fn new(
|
||||
task_id: TaskId,
|
||||
kind: KindWithContent,
|
||||
primary_key: Option<&str>,
|
||||
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match AutobatchKind::from(kind) {
|
||||
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
|
||||
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
|
||||
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
|
||||
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
{
|
||||
(
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key: pk,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
)
|
||||
}
|
||||
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
|
||||
K::DocumentImport { allow_index_creation, primary_key } => (
|
||||
Break(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
),
|
||||
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||
K::DocumentDeletion { by_filter: includes_by_filter } => (
|
||||
Continue(BatchKind::DocumentDeletion {
|
||||
deletion_ids: vec![task_id],
|
||||
includes_by_filter,
|
||||
}),
|
||||
false,
|
||||
),
|
||||
K::Settings { allow_index_creation } => (
|
||||
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
||||
allow_index_creation,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
#[rustfmt::skip]
|
||||
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match (self, kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break(this)
|
||||
},
|
||||
// NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue.
|
||||
// I wrote it this way because it's easier to understand than the other way around.
|
||||
(this, kind) if !(
|
||||
// 1. If both task don't interact with primary key -> we can continue
|
||||
(this.primary_key().is_none() && kind.primary_key().is_none()) ||
|
||||
// 2. Else ->
|
||||
(
|
||||
// 2.1 If we already have a primary-key ->
|
||||
(
|
||||
primary_key.is_some() &&
|
||||
// 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key
|
||||
// 2.1.2 If the task don't have a primary-key -> we can continue
|
||||
kind.primary_key().map_or(true, |pk| pk == primary_key)
|
||||
) ||
|
||||
// 2.2 If we don't have a primary-key ->
|
||||
(
|
||||
// 2.2.1 If both the batch and the task have a primary key they should be equal
|
||||
// 2.2.2 If the batch is set to Some(None), the task should be too
|
||||
// 2.2.3 If the batch is set to None -> we can continue
|
||||
this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind)
|
||||
)
|
||||
)
|
||||
|
||||
) // closing the negation
|
||||
|
||||
=> {
|
||||
Break(this)
|
||||
},
|
||||
// The index deletion can batch with everything but must stop after
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids }
|
||||
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
|
||||
| BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, operation_ids: mut ids }
|
||||
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
ids.append(&mut other);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids },
|
||||
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
|
||||
) => {
|
||||
ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids })
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentClear { .. },
|
||||
K::DocumentImport { .. } | K::Settings { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, mut operation_ids },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: operation_ids })
|
||||
}
|
||||
|
||||
// we can autobatch different kind of document operations and mix replacements with updates
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { primary_key: pk, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
operation_ids,
|
||||
primary_key: pk,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation, primary_key, mut operation_ids },
|
||||
K::DocumentDeletion { by_filter: false },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// We can't batch a document operation with a delete by filter
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentDeletion { by_filter: true },
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
||||
}
|
||||
// we can't autobatch the deletion and import if the document deletion contained a filter
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
|
||||
K::DocumentImport { .. }
|
||||
) => Break(this),
|
||||
// we can autobatch the deletion and import if the index already exists
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { allow_index_creation, primary_key }
|
||||
) if index_already_exists => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can autobatch the deletion and import if both can't create an index
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { allow_index_creation, primary_key }
|
||||
) if !allow_index_creation => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { .. },
|
||||
K::DocumentImport { .. }
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
|
||||
}
|
||||
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
other: vec![id],
|
||||
}),
|
||||
(
|
||||
this @ BatchKind::Settings { .. },
|
||||
K::DocumentImport { .. } | K::DocumentDeletion { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::Settings { mut settings_ids, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::Settings {
|
||||
allow_index_creation,
|
||||
settings_ids,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
|
||||
(
|
||||
BatchKind::ClearAndSettings {
|
||||
mut other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
},
|
||||
K::DocumentDeletion { .. },
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a batch from an ordered list of tasks.
|
||||
///
|
||||
/// ## Preconditions
|
||||
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
|
||||
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
|
||||
/// 3. The tasks must all be related to the same index
|
||||
///
|
||||
/// ## Return
|
||||
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
|
||||
/// a subset of the given tasks.
|
||||
pub fn autobatch(
|
||||
enqueued: Vec<(TaskId, KindWithContent)>,
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
let mut enqueued = enqueued.into_iter();
|
||||
let (id, kind) = enqueued.next()?;
|
||||
|
||||
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
|
||||
let mut index_exist = index_already_exists;
|
||||
|
||||
let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) {
|
||||
(Continue(acc), create) => (acc, create),
|
||||
(Break(acc), create) => return Some((acc, create)),
|
||||
};
|
||||
|
||||
// if an index has been created in the previous step we can consider it as existing.
|
||||
index_exist |= must_create_index;
|
||||
|
||||
for (id, kind) in enqueued {
|
||||
acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) {
|
||||
Continue(acc) => acc,
|
||||
Break(acc) => return Some((acc, must_create_index)),
|
||||
};
|
||||
}
|
||||
|
||||
Some((acc, must_create_index))
|
||||
}
|
||||
395
crates/index-scheduler/src/scheduler/autobatcher_test.rs
Normal file
395
crates/index-scheduler/src/scheduler/autobatcher_test.rs
Normal file
@@ -0,0 +1,395 @@
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
self, ReplaceDocuments, UpdateDocuments,
|
||||
};
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use uuid::Uuid;
|
||||
|
||||
use self::autobatcher::{autobatch, BatchKind};
|
||||
use super::*;
|
||||
use crate::TaskId;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! debug_snapshot {
|
||||
($value:expr, @$snapshot:literal) => {{
|
||||
let value = format!("{:?}", $value);
|
||||
meili_snap::snapshot!(value, @$snapshot);
|
||||
}};
|
||||
}
|
||||
|
||||
fn autobatch_from(
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
input: impl IntoIterator<Item = KindWithContent>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
autobatch(
|
||||
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(),
|
||||
index_already_exists,
|
||||
primary_key,
|
||||
)
|
||||
}
|
||||
|
||||
fn doc_imp(
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> KindWithContent {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
primary_key: primary_key.map(|pk| pk.to_string()),
|
||||
method,
|
||||
content_file: Uuid::new_v4(),
|
||||
documents_count: 0,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: String::from("doggo"),
|
||||
documents_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del_fil() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: String::from("doggo"),
|
||||
filter_expr: serde_json::json!("cuteness > 100"),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_clr() -> KindWithContent {
|
||||
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn settings(allow_index_creation: bool) -> KindWithContent {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
new_settings: Default::default(),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn idx_create() -> KindWithContent {
|
||||
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_update() -> KindWithContent {
|
||||
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_del() -> KindWithContent {
|
||||
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn idx_swap() -> KindWithContent {
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_simple_operation_together() {
|
||||
// we can autobatch one or multiple `ReplaceDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// we can autobatch one or multiple `UpdateDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletion together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletionByFilter together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
|
||||
// we can autobatch one or multiple Settings together
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// We can autobatch document addition with document deletion
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
|
||||
// But we can't autobatch document addition with document deletion by filter
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_different_document_operations_autobatch_together() {
|
||||
// addition and updates with deletion by filter can't batch together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions() {
|
||||
// these two doesn't need to batch
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
// Basic use case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// This batch kind doesn't mix with other document addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// But you can batch multiple clear together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions_and_settings() {
|
||||
// A clear don't need to autobatch the settings that happens AFTER there is no documents
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anything_and_index_deletion() {
|
||||
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
|
||||
// First, the basic cases
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowed_and_disallowed_index_creation() {
|
||||
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_primary_key() {
|
||||
// ==> If I have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// ==> If I don't have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
}
|
||||
567
crates/index-scheduler/src/scheduler/create_batch.rs
Normal file
567
crates/index-scheduler/src/scheduler/create_batch.rs
Normal file
@@ -0,0 +1,567 @@
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::autobatcher::{self, BatchKind};
|
||||
use crate::utils::ProcessingBatch;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
/// Represents a combination of tasks that can all be processed at the same time.
|
||||
///
|
||||
/// A batch contains the set of tasks that it represents (accessible through
|
||||
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
|
||||
/// be processed.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum Batch {
|
||||
TaskCancelation {
|
||||
/// The task cancelation itself.
|
||||
task: Task,
|
||||
},
|
||||
TaskDeletions(Vec<Task>),
|
||||
SnapshotCreation(Vec<Task>),
|
||||
Dump(Task),
|
||||
IndexOperation {
|
||||
op: IndexOperation,
|
||||
must_create_index: bool,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
task: Task,
|
||||
},
|
||||
IndexUpdate {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
task: Task,
|
||||
},
|
||||
IndexDeletion {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
index_has_been_created: bool,
|
||||
},
|
||||
IndexSwap {
|
||||
task: Task,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum DocumentOperation {
|
||||
Replace(Uuid),
|
||||
Update(Uuid),
|
||||
Delete(Vec<String>),
|
||||
}
|
||||
|
||||
/// A [batch](Batch) that combines multiple tasks operating on an index.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum IndexOperation {
|
||||
DocumentOperation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentEdition {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentClear {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
Settings {
|
||||
index_uid: String,
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentClearAndSetting {
|
||||
index_uid: String,
|
||||
cleared_tasks: Vec<Task>,
|
||||
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
settings_tasks: Vec<Task>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
/// Return the task ids associated with this batch.
|
||||
pub fn ids(&self) -> RoaringBitmap {
|
||||
match self {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
| Batch::TaskDeletions(tasks)
|
||||
| Batch::UpgradeDatabase { tasks }
|
||||
| Batch::IndexDeletion { tasks, .. } => {
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { tasks, .. }
|
||||
| IndexOperation::Settings { tasks, .. }
|
||||
| IndexOperation::DocumentDeletion { tasks, .. }
|
||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
IndexOperation::DocumentEdition { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
cleared_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
|
||||
},
|
||||
Batch::IndexSwap { task } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the index UID associated with this batch
|
||||
pub fn index_uid(&self) -> Option<&str> {
|
||||
use Batch::*;
|
||||
match self {
|
||||
TaskCancelation { .. }
|
||||
| TaskDeletions(_)
|
||||
| SnapshotCreation(_)
|
||||
| Dump(_)
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Batch {
|
||||
/// A text used when we debug the profiling reports.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let index_uid = self.index_uid();
|
||||
let tasks = self.ids();
|
||||
match self {
|
||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
||||
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
||||
Batch::Dump(_) => f.write_str("Dump")?,
|
||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
||||
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
pub fn index_uid(&self) -> &str {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { index_uid, .. }
|
||||
| IndexOperation::DocumentEdition { index_uid, .. }
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for IndexOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::DocumentOperation")
|
||||
}
|
||||
IndexOperation::DocumentEdition { .. } => {
|
||||
f.write_str("IndexOperation::DocumentEdition")
|
||||
}
|
||||
IndexOperation::DocumentDeletion { .. } => {
|
||||
f.write_str("IndexOperation::DocumentDeletion")
|
||||
}
|
||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
/// ## Arguments
|
||||
/// - `rtxn`: read transaction
|
||||
/// - `index_uid`: name of the index affected by the operations of the autobatch
|
||||
/// - `batch`: the result of the autobatcher
|
||||
pub(crate) fn create_next_batch_index(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
index_uid: String,
|
||||
batch: BatchKind,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
must_create_index: bool,
|
||||
) -> Result<Option<Batch>> {
|
||||
match batch {
|
||||
BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClear {
|
||||
tasks: self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
ids,
|
||||
)?,
|
||||
index_uid,
|
||||
},
|
||||
must_create_index,
|
||||
})),
|
||||
BatchKind::DocumentEdition { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
match &task.kind {
|
||||
KindWithContent::DocumentEdition { index_uid, .. } => {
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentEdition {
|
||||
index_uid: index_uid.clone(),
|
||||
task,
|
||||
},
|
||||
must_create_index: false,
|
||||
}))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
BatchKind::DocumentOperation { operation_ids, .. } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
operation_ids,
|
||||
)?;
|
||||
let primary_key = tasks
|
||||
.iter()
|
||||
.find_map(|task| match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
|
||||
// we want to stop on the first document addition
|
||||
Some(primary_key.clone())
|
||||
}
|
||||
KindWithContent::DocumentDeletion { .. } => None,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.flatten();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
content_file, method, ..
|
||||
} => match method {
|
||||
IndexDocumentsMethod::ReplaceDocuments => {
|
||||
operations.push(DocumentOperation::Replace(content_file))
|
||||
}
|
||||
IndexDocumentsMethod::UpdateDocuments => {
|
||||
operations.push(DocumentOperation::Update(content_file))
|
||||
}
|
||||
_ => unreachable!("Unknown document merging method"),
|
||||
},
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
deletion_ids,
|
||||
)?;
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentDeletion { index_uid, tasks },
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::Settings { settings_ids, .. } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
settings_ids,
|
||||
)?;
|
||||
|
||||
let mut settings = Vec::new();
|
||||
for task in &tasks {
|
||||
match task.kind {
|
||||
KindWithContent::SettingsUpdate {
|
||||
ref new_settings, is_deletion, ..
|
||||
} => settings.push((is_deletion, *new_settings.clone())),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { index_uid, settings, tasks },
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => {
|
||||
let (index_uid, settings, settings_tasks) = match self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid,
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?
|
||||
.unwrap()
|
||||
{
|
||||
Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { index_uid, settings, tasks, .. },
|
||||
..
|
||||
} => (index_uid, settings, tasks),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (index_uid, cleared_tasks) = match self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid,
|
||||
BatchKind::DocumentClear { ids: other },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?
|
||||
.unwrap()
|
||||
{
|
||||
Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClear { index_uid, tasks },
|
||||
..
|
||||
} => (index_uid, tasks),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
cleared_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::IndexCreation { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
let (index_uid, primary_key) = match &task.kind {
|
||||
KindWithContent::IndexCreation { index_uid, primary_key } => {
|
||||
(index_uid.clone(), primary_key.clone())
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Ok(Some(Batch::IndexCreation { index_uid, primary_key, task }))
|
||||
}
|
||||
BatchKind::IndexUpdate { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
let primary_key = match &task.kind {
|
||||
KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task }))
|
||||
}
|
||||
BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion {
|
||||
index_uid,
|
||||
index_has_been_created: must_create_index,
|
||||
tasks: self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
ids,
|
||||
)?,
|
||||
})),
|
||||
BatchKind::IndexSwap { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexSwap { task }))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create the next batch to be processed;
|
||||
/// 1. We get the *last* task to cancel.
|
||||
/// 2. We get the *next* task to delete.
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
) -> Result<Option<(Batch, ProcessingBatch)>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
let batch_id = self.queue.batches.next_batch_id(rtxn)?;
|
||||
let mut current_batch = ProcessingBatch::new(batch_id);
|
||||
|
||||
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
|
||||
// 0. The priority over everything is to upgrade the instance
|
||||
// There shouldn't be multiple upgrade tasks but just in case we're going to batch all of them at the same time
|
||||
let upgrade = self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)? & (enqueued | failed);
|
||||
if !upgrade.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, upgrade)?;
|
||||
// In the case of an upgrade database batch, we want to find back the original batch that tried processing it
|
||||
// and re-use its id
|
||||
if let Some(batch_uid) = tasks.last().unwrap().batch_uid {
|
||||
current_batch.uid = batch_uid;
|
||||
}
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::UpgradeDatabase { tasks }, current_batch)));
|
||||
}
|
||||
|
||||
// 1. we get the last task to cancel.
|
||||
let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
if let Some(task_id) = to_cancel.max() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::TaskCancelation { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 2. we get the next task to delete
|
||||
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
|
||||
if !to_delete.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
// and use the autobatcher to batch the enqueued tasks associated with it
|
||||
|
||||
let index_name = if let Some(&index_name) = task.indexes().first() {
|
||||
index_name
|
||||
} else {
|
||||
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
|
||||
};
|
||||
|
||||
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
|
||||
let mut primary_key = None;
|
||||
if index_already_exists {
|
||||
let index = self.index_mapper.index(rtxn, index_name)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
}
|
||||
|
||||
let index_tasks = self.queue.tasks.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit = if self.scheduler.autobatching_enabled {
|
||||
self.scheduler.max_number_of_batched_tasks
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
let mut enqueued = Vec::new();
|
||||
let mut total_size: u64 = 0;
|
||||
for task_id in index_tasks.into_iter().take(tasks_limit) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
||||
total_size = total_size.saturating_add(content_size);
|
||||
}
|
||||
|
||||
if total_size > self.scheduler.batched_tasks_size_limit && !enqueued.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
enqueued.push((task.uid, task.kind));
|
||||
}
|
||||
|
||||
if let Some((batchkind, create_index)) =
|
||||
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
|
||||
{
|
||||
return Ok(self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_name.to_string(),
|
||||
batchkind,
|
||||
&mut current_batch,
|
||||
create_index,
|
||||
)?
|
||||
.map(|batch| (batch, current_batch)));
|
||||
}
|
||||
|
||||
// If we found no tasks then we were notified for something that got autobatched
|
||||
// somehow and there is nothing to do.
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
361
crates/index-scheduler/src/scheduler/mod.rs
Normal file
361
crates/index-scheduler/src/scheduler/mod.rs
Normal file
@@ -0,0 +1,361 @@
|
||||
mod autobatcher;
|
||||
#[cfg(test)]
|
||||
mod autobatcher_test;
|
||||
mod create_batch;
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_index_operation;
|
||||
mod process_snapshot_creation;
|
||||
mod process_upgrade;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
#[cfg(test)]
|
||||
mod test_document_addition;
|
||||
#[cfg(test)]
|
||||
mod test_embedders;
|
||||
#[cfg(test)]
|
||||
mod test_failure;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use rayon::current_num_threads;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
|
||||
use crate::processing::{AtomicTaskStep, BatchProgress};
|
||||
use crate::{Error, IndexScheduler, IndexSchedulerOptions, Result, TickOutcome};
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct MustStopProcessing(Arc<AtomicBool>);
|
||||
|
||||
impl MustStopProcessing {
|
||||
pub fn get(&self) -> bool {
|
||||
self.0.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn must_stop(&self) {
|
||||
self.0.store(true, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn reset(&self) {
|
||||
self.0.store(false, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scheduler {
|
||||
/// A boolean that can be set to true to stop the currently processing tasks.
|
||||
pub must_stop_processing: MustStopProcessing,
|
||||
|
||||
/// Get a signal when a batch needs to be processed.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) max_number_of_batched_tasks: usize,
|
||||
|
||||
/// The maximum size, in bytes, of tasks in a batch.
|
||||
pub(crate) batched_tasks_size_limit: u64,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
/// The path used to create the snapshots.
|
||||
pub(crate) snapshots_path: PathBuf,
|
||||
|
||||
/// The path to the folder containing the auth LMDB env.
|
||||
pub(crate) auth_path: PathBuf,
|
||||
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub(crate) fn private_clone(&self) -> Scheduler {
|
||||
Scheduler {
|
||||
must_stop_processing: self.must_stop_processing.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: self.batched_tasks_size_limit,
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(options: &IndexSchedulerOptions) -> Scheduler {
|
||||
Scheduler {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: options.batched_tasks_size_limit,
|
||||
dumps_path: options.dumps_path.clone(),
|
||||
snapshots_path: options.snapshots_path.clone(),
|
||||
auth_path: options.auth_path.clone(),
|
||||
version_file_path: options.version_file_path.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Perform one iteration of the run loop.
|
||||
///
|
||||
/// 1. See if we need to cleanup the task queue
|
||||
/// 2. Find the next batch of tasks to be processed.
|
||||
/// 3. Update the information of these tasks following the start of their processing.
|
||||
/// 4. Update the in-memory list of processed tasks accordingly.
|
||||
/// 5. Process the batch:
|
||||
/// - perform the actions of each batched task
|
||||
/// - update the information of each batched task following the end
|
||||
/// of their processing.
|
||||
/// 6. Reset the in-memory list of processed tasks.
|
||||
///
|
||||
/// Returns the number of processed tasks.
|
||||
pub(crate) fn tick(&self) -> Result<TickOutcome> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
*self.run_loop_iteration.write().unwrap() += 1;
|
||||
self.breakpoint(crate::test_utils::Breakpoint::Start);
|
||||
}
|
||||
|
||||
if self.cleanup_enabled {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.queue.cleanup_task_queue(&mut wtxn)?;
|
||||
wtxn.commit()?;
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
let (batch, mut processing_batch) =
|
||||
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
|
||||
Some(batch) => batch,
|
||||
None => return Ok(TickOutcome::WaitForSignal),
|
||||
};
|
||||
let index_uid = batch.index_uid().map(ToOwned::to_owned);
|
||||
drop(rtxn);
|
||||
|
||||
// 1. store the starting date with the bitmap of processing tasks.
|
||||
let mut ids = batch.ids();
|
||||
let processed_tasks = ids.len();
|
||||
|
||||
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
||||
self.scheduler.must_stop_processing.reset();
|
||||
let progress = self
|
||||
.processing_tasks
|
||||
.write()
|
||||
.unwrap()
|
||||
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
||||
.start_processing(processing_batch.clone(), ids.clone());
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::BatchCreated);
|
||||
|
||||
// 2. Process the tasks
|
||||
let res = {
|
||||
let cloned_index_scheduler = self.private_clone();
|
||||
let processing_batch = &mut processing_batch;
|
||||
let progress = progress.clone();
|
||||
std::thread::scope(|s| {
|
||||
let handle = std::thread::Builder::new()
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn_scoped(s, move || {
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, progress)
|
||||
})
|
||||
.unwrap();
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
})
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
self.index_mapper.set_currently_updating_index(None);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
||||
processing_batch.finished();
|
||||
let mut stop_scheduler_forever = false;
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
|
||||
match res {
|
||||
Ok(tasks) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
||||
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
let mut canceled_by = None;
|
||||
|
||||
#[allow(unused_variables)]
|
||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
processing_batch.update(&mut task);
|
||||
if task.status == Status::Canceled {
|
||||
canceled.insert(task.uid);
|
||||
canceled_by = task.canceled_by;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchSuccess {
|
||||
task_uid: i as u32,
|
||||
},
|
||||
)?;
|
||||
|
||||
match task.error {
|
||||
Some(_) => failure += 1,
|
||||
None => success += 1,
|
||||
}
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?;
|
||||
}
|
||||
tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
|
||||
}
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
|
||||
..
|
||||
})
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AbortedIndexation);
|
||||
wtxn.abort();
|
||||
|
||||
tracing::info!("A batch of tasks was aborted.");
|
||||
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
|
||||
// this is because we want to let the next tick call `create_next_batch` and keep
|
||||
// the `started_at` date times and `processings` of the current processing tasks.
|
||||
// This date time is used by the task cancelation to store the right `started_at`
|
||||
// date in the task on disk.
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
// If an index said it was full, we need to:
|
||||
// 1. identify which index is full
|
||||
// 2. close the associated environment
|
||||
// 3. resize it
|
||||
// 4. re-schedule tasks
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
|
||||
..
|
||||
}) if index_uid.is_some() => {
|
||||
// fixme: add index_uid to match to avoid the unwrap
|
||||
let index_uid = index_uid.unwrap();
|
||||
// fixme: handle error more gracefully? not sure when this could happen
|
||||
self.index_mapper.resize_index(&wtxn, &index_uid)?;
|
||||
wtxn.abort();
|
||||
|
||||
tracing::info!("The max database size was reached. Resizing the index.");
|
||||
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
// In case of a failure we must get back and patch all the tasks with the error.
|
||||
Err(err) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
|
||||
if matches!(err, Error::DatabaseUpgrade(_)) {
|
||||
tracing::error!(
|
||||
"Upgrade task failed, tasks won't be processed until the following issue is fixed: {err}"
|
||||
);
|
||||
stop_scheduler_forever = true;
|
||||
}
|
||||
let error: ResponseError = err.into();
|
||||
for id in ids.iter() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
let mut task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&wtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(error.clone());
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
processing_batch.update(&mut task);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchFailure,
|
||||
)?;
|
||||
|
||||
tracing::error!("Batch failed {}", error);
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We must re-add the canceled task so they're part of the same batch.
|
||||
ids |= canceled;
|
||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
|
||||
let idx = AtomicU32::new(0);
|
||||
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
|
||||
let rtxn = self.read_txn()?;
|
||||
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
|
||||
tracing::error!(
|
||||
"Failure to delete the content files associated with task {}. Error: {e}",
|
||||
task.uid
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||
let _ = self.notify_webhook(&ids);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
|
||||
|
||||
if stop_scheduler_forever {
|
||||
Ok(TickOutcome::StopProcessingForever)
|
||||
} else {
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
}
|
||||
}
|
||||
603
crates/index-scheduler/src/scheduler/process_batch.rs
Normal file
603
crates/index-scheduler/src/scheduler/process_batch.rs
Normal file
@@ -0,0 +1,603 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress,
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Apply the operation associated with the given batch.
|
||||
///
|
||||
/// ## Return
|
||||
/// The list of tasks that were processed. The metadata of each task in the returned
|
||||
/// list is updated accordingly, with the exception of the its date fields
|
||||
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
||||
#[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
||||
pub(crate) fn process_batch(
|
||||
&self,
|
||||
batch: Batch,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
) -> Result<Vec<Task>> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::PanicInsideProcessBatch)?;
|
||||
self.breakpoint(crate::test_utils::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
let matched_tasks =
|
||||
if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind {
|
||||
tasks
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let mut canceled_tasks = self.cancel_matched_tasks(
|
||||
&rtxn,
|
||||
task.uid,
|
||||
current_batch,
|
||||
matched_tasks,
|
||||
&progress,
|
||||
)?;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
match &mut task.details {
|
||||
Some(Details::TaskCancelation {
|
||||
matched_tasks: _,
|
||||
canceled_tasks: canceled_tasks_details,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*canceled_tasks_details = Some(canceled_tasks.len() as u64);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
canceled_tasks.push(task);
|
||||
|
||||
Ok(canceled_tasks)
|
||||
}
|
||||
Batch::TaskDeletions(mut tasks) => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
let mut matched_tasks = RoaringBitmap::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
|
||||
matched_tasks |= tasks;
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut deleted_tasks =
|
||||
self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Succeeded;
|
||||
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
|
||||
deleted_tasks -= tasks;
|
||||
|
||||
match &mut task.details {
|
||||
Some(Details::TaskDeletion {
|
||||
matched_tasks: _,
|
||||
deleted_tasks,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*deleted_tasks = Some(deleted_tasks_count);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Ok(tasks)
|
||||
}
|
||||
Batch::SnapshotCreation(tasks) => self.process_snapshot(progress, tasks),
|
||||
Batch::Dump(task) => self.process_dump_creation(progress, task),
|
||||
Batch::IndexOperation { op, must_create_index } => {
|
||||
let index_uid = op.index_uid().to_string();
|
||||
let index = if must_create_index {
|
||||
// create the index if it doesn't already exist
|
||||
let wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.create_index(wtxn, &index_uid, None)?
|
||||
} else {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, &index_uid)?
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
self.index_mapper
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||
let _entered = span.enter();
|
||||
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<()> {
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
||||
|
||||
let wtxn = self.env.write_txn()?;
|
||||
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
||||
return Err(Error::IndexAlreadyExists(index_uid));
|
||||
}
|
||||
self.index_mapper.create_index(wtxn, &index_uid, None)?;
|
||||
|
||||
self.process_batch(
|
||||
Batch::IndexUpdate { index_uid, primary_key, task },
|
||||
current_batch,
|
||||
progress,
|
||||
)
|
||||
}
|
||||
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
|
||||
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let index = self.index_mapper.index(&rtxn, &index_uid)?;
|
||||
|
||||
if let Some(primary_key) = primary_key.clone() {
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let mut builder = MilliSettings::new(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
self.index_mapper.indexer_config(),
|
||||
);
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// drop rtxn before starting a new wtxn on the same db
|
||||
rtxn.commit()?;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::IndexInfo { primary_key });
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok(vec![task])
|
||||
}
|
||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
||||
let wtxn = self.env.write_txn()?;
|
||||
|
||||
// it's possible that the index doesn't exist
|
||||
let number_of_documents = || -> Result<u64> {
|
||||
let index = self.index_mapper.index(&wtxn, &index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
index
|
||||
.number_of_documents(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))
|
||||
}()
|
||||
.unwrap_or_default();
|
||||
|
||||
// The write transaction is directly owned and committed inside.
|
||||
match self.index_mapper.delete_index(wtxn, &index_uid) {
|
||||
Ok(()) => (),
|
||||
Err(Error::IndexNotFound(_)) if index_has_been_created => (),
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
|
||||
// We set all the tasks details to the default value.
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = match &task.kind {
|
||||
KindWithContent::IndexDeletion { .. } => {
|
||||
Some(Details::ClearAll { deleted_documents: Some(number_of_documents) })
|
||||
}
|
||||
otherwise => otherwise.default_finished_details(),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
Batch::IndexSwap { mut task } => {
|
||||
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
|
||||
swaps
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let mut not_found_indexes = BTreeSet::new();
|
||||
for IndexSwap { indexes: (lhs, rhs) } in swaps {
|
||||
for index in [lhs, rhs] {
|
||||
let index_exists = self.index_mapper.index_exists(&wtxn, index)?;
|
||||
if !index_exists {
|
||||
not_found_indexes.insert(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !not_found_indexes.is_empty() {
|
||||
if not_found_indexes.len() == 1 {
|
||||
return Err(Error::SwapIndexNotFound(
|
||||
not_found_indexes.into_iter().next().unwrap().clone(),
|
||||
));
|
||||
} else {
|
||||
return Err(Error::SwapIndexesNotFound(
|
||||
not_found_indexes.into_iter().cloned().collect(),
|
||||
));
|
||||
}
|
||||
}
|
||||
progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
|
||||
for (step, swap) in swaps.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<SwappingTheIndexes>::new(
|
||||
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
|
||||
step as u32,
|
||||
swaps.len() as u32,
|
||||
));
|
||||
self.apply_index_swap(
|
||||
&mut wtxn,
|
||||
&progress,
|
||||
task.uid,
|
||||
&swap.indexes.0,
|
||||
&swap.indexes.1,
|
||||
)?;
|
||||
}
|
||||
wtxn.commit()?;
|
||||
task.status = Status::Succeeded;
|
||||
Ok(vec![task])
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
unreachable!();
|
||||
};
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| self.process_upgrade(from, progress)));
|
||||
match ret {
|
||||
Ok(Ok(())) => (),
|
||||
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
|
||||
Err(_e) => {
|
||||
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked)));
|
||||
}
|
||||
}
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Succeeded;
|
||||
// Since this task can be retried we must reset its error status
|
||||
task.error = None;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Swap the index `lhs` with the index `rhs`.
|
||||
fn apply_index_swap(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
progress: &Progress,
|
||||
task_id: u32,
|
||||
lhs: &str,
|
||||
rhs: &str,
|
||||
) -> Result<()> {
|
||||
progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
|
||||
// 1. Verify that both lhs and rhs are existing indexes
|
||||
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
||||
if !index_lhs_exists {
|
||||
return Err(Error::IndexNotFound(lhs.to_owned()));
|
||||
}
|
||||
let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?;
|
||||
if !index_rhs_exists {
|
||||
return Err(Error::IndexNotFound(rhs.to_owned()));
|
||||
}
|
||||
|
||||
// 2. Get the task set for index = name that appeared before the index swap task
|
||||
let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?;
|
||||
index_lhs_task_ids.remove_range(task_id..);
|
||||
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
|
||||
index_rhs_task_ids.remove_range(task_id..);
|
||||
|
||||
// 3. before_name -> new_name in the task's KindWithContent
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
|
||||
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
|
||||
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
|
||||
for task_id in tasks_to_update {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||
self.queue.tasks.all_tasks.put(wtxn, &task_id, &task)?;
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 4. remove the task from indexuid = before_name
|
||||
// 5. add the task to indexuid = after_name
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
|
||||
self.queue.tasks.update_index(wtxn, lhs, |lhs_tasks| {
|
||||
*lhs_tasks -= &index_lhs_task_ids;
|
||||
*lhs_tasks |= &index_rhs_task_ids;
|
||||
})?;
|
||||
self.queue.tasks.update_index(wtxn, rhs, |rhs_tasks| {
|
||||
*rhs_tasks -= &index_rhs_task_ids;
|
||||
*rhs_tasks |= &index_lhs_task_ids;
|
||||
})?;
|
||||
|
||||
// 6. Swap in the index mapper
|
||||
self.index_mapper.swap(wtxn, lhs, rhs)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete each given task from all the databases (if it is deleteable).
|
||||
///
|
||||
/// Return the number of tasks that were actually deleted.
|
||||
fn delete_matched_tasks(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<RoaringBitmap> {
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
|
||||
|
||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||
let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?;
|
||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||
|
||||
let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?;
|
||||
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
||||
to_delete_tasks -= &**processing_tasks;
|
||||
to_delete_tasks -= &enqueued_tasks;
|
||||
|
||||
// 2. We now have a list of tasks to delete, delete them
|
||||
|
||||
let mut affected_indexes = HashSet::new();
|
||||
let mut affected_statuses = HashSet::new();
|
||||
let mut affected_kinds = HashSet::new();
|
||||
let mut affected_canceled_by = RoaringBitmap::new();
|
||||
// The tasks that have been removed *per batches*.
|
||||
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
||||
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
for task_id in to_delete_tasks.iter() {
|
||||
let task =
|
||||
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned()));
|
||||
affected_statuses.insert(task.status);
|
||||
affected_kinds.insert(task.kind.as_kind());
|
||||
// Note: don't delete the persisted task data since
|
||||
// we can only delete succeeded, failed, and canceled tasks.
|
||||
// In each of those cases, the persisted data is supposed to
|
||||
// have been deleted already.
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(canceled_by) = task.canceled_by {
|
||||
affected_canceled_by.insert(canceled_by);
|
||||
}
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
affected_batches.entry(batch_uid).or_default().insert(task_id);
|
||||
}
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(
|
||||
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
|
||||
);
|
||||
progress.update_progress(task_progress);
|
||||
for index in affected_indexes.iter() {
|
||||
self.queue.tasks.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
for status in affected_statuses.iter() {
|
||||
self.queue.tasks.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
for kind in affected_kinds.iter() {
|
||||
self.queue.tasks.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasks);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
for task in to_delete_tasks.iter() {
|
||||
self.queue.tasks.all_tasks.delete(wtxn, &task)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
for canceled_by in affected_canceled_by {
|
||||
if let Some(mut tasks) = self.queue.tasks.canceled_by.get(wtxn, &canceled_by)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
if tasks.is_empty() {
|
||||
self.queue.tasks.canceled_by.delete(wtxn, &canceled_by)?;
|
||||
} else {
|
||||
self.queue.tasks.canceled_by.put(wtxn, &canceled_by, &tasks)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
progress.update_progress(TaskDeletionProgress::DeletingBatches);
|
||||
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
|
||||
progress.update_progress(batch_progress);
|
||||
for (batch_id, to_delete_tasks) in affected_batches {
|
||||
if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
// We must remove the batch entirely
|
||||
if tasks.is_empty() {
|
||||
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
|
||||
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
|
||||
}
|
||||
// Anyway, we must remove the batch from all its reverse indexes.
|
||||
// The only way to do that is to check
|
||||
|
||||
for index in affected_indexes.iter() {
|
||||
let index_tasks = self.queue.tasks.index_tasks(wtxn, index)?;
|
||||
let remaining_index_tasks = index_tasks & &tasks;
|
||||
if remaining_index_tasks.is_empty() {
|
||||
self.queue.batches.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
for status in affected_statuses.iter() {
|
||||
let status_tasks = self.queue.tasks.get_status(wtxn, *status)?;
|
||||
let remaining_status_tasks = status_tasks & &tasks;
|
||||
if remaining_status_tasks.is_empty() {
|
||||
self.queue.batches.update_status(wtxn, *status, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
for kind in affected_kinds.iter() {
|
||||
let kind_tasks = self.queue.tasks.get_kind(wtxn, *kind)?;
|
||||
let remaining_kind_tasks = kind_tasks & &tasks;
|
||||
if remaining_kind_tasks.is_empty() {
|
||||
self.queue.batches.update_kind(wtxn, *kind, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(to_delete_tasks)
|
||||
}
|
||||
|
||||
/// Cancel each given task from all the databases (if it is cancelable).
|
||||
///
|
||||
/// Returns the list of tasks that matched the filter and must be written in the database.
|
||||
fn cancel_matched_tasks(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
cancel_task_id: TaskId,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(TaskCancelationProgress::RetrievingTasks);
|
||||
|
||||
// 1. Remove from this list the tasks that we are not allowed to cancel
|
||||
// Notice that only the _enqueued_ ones are cancelable and we should
|
||||
// have already aborted the indexation of the _processing_ ones
|
||||
let cancelable_tasks = self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let tasks_to_cancel = cancelable_tasks & matched_tasks;
|
||||
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
// 2. We now have a list of tasks to cancel, cancel them
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(
|
||||
rtxn,
|
||||
tasks_to_cancel.iter().inspect(|_| {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}),
|
||||
)?;
|
||||
|
||||
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Canceled;
|
||||
task.canceled_by = Some(cancel_task_id);
|
||||
task.details = task.details.as_ref().map(|d| d.to_failed());
|
||||
current_batch.processing(Some(task));
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
240
crates/index-scheduler/src/scheduler/process_dump_creation.rs
Normal file
240
crates/index-scheduler/src/scheduler/process_dump_creation.rs
Normal file
@@ -0,0 +1,240 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::processing::{AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_dump_creation(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut task: Task,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
let (keys, instance_uid) =
|
||||
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
|
||||
(keys, instance_uid)
|
||||
} else {
|
||||
unreachable!();
|
||||
};
|
||||
let dump = dump::DumpWriter::new(*instance_uid)?;
|
||||
|
||||
// 1. dump the keys
|
||||
progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
for key in keys {
|
||||
dump_keys.push_key(key)?;
|
||||
}
|
||||
dump_keys.flush()?;
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2. dump the tasks
|
||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
|
||||
let (atomic, update_task_progress) =
|
||||
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32);
|
||||
progress.update_progress(update_task_progress);
|
||||
|
||||
for ret in self.queue.tasks.all_tasks.iter(&rtxn)? {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
|
||||
// In the case we're dumping ourselves we want to be marked as finished
|
||||
// to not loop over ourselves indefinitely.
|
||||
if t.uid == task.uid {
|
||||
let finished_at = OffsetDateTime::now_utc();
|
||||
|
||||
// We're going to fake the date because we don't know if everything is going to go well.
|
||||
// But we need to dump the task as finished and successful.
|
||||
// If something fail everything will be set appropriately in the end.
|
||||
t.status = Status::Succeeded;
|
||||
t.started_at = Some(started_at);
|
||||
t.finished_at = Some(finished_at);
|
||||
}
|
||||
|
||||
// Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted.
|
||||
// This prevent from referencing *future* batches not actually associated with the task.
|
||||
//
|
||||
// See <https://github.com/meilisearch/meilisearch/issues/5247> for details.
|
||||
t.batch_uid = None;
|
||||
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
if status == Status::Enqueued {
|
||||
let content_file = self.queue.file_store.get_update(content_file)?;
|
||||
|
||||
for document in
|
||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
||||
{
|
||||
let document = document.map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})?;
|
||||
dump_content_file.push_document(&document)?;
|
||||
}
|
||||
|
||||
dump_content_file.flush()?;
|
||||
}
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
progress.update_progress(VariableNameStep::<DumpCreationProgress>::new(
|
||||
uid.to_string(),
|
||||
count,
|
||||
nb_indexes,
|
||||
));
|
||||
count += 1;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index
|
||||
.created_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
updated_at: index
|
||||
.updated_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index
|
||||
.embedding_configs(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let nb_documents = index
|
||||
.number_of_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
as u32;
|
||||
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
|
||||
progress.update_progress(update_document_progress);
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 3.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&rtxn, id)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
let user_err =
|
||||
milli::Error::UserError(milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(&rtxn, std::iter::once(id))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={id}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
});
|
||||
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
index_dumper.push_document(&document)?;
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
meilisearch_types::settings::SecretPolicy::RevealSecrets,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
index_dumper.settings(&settings)?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
progress.update_progress(DumpCreationProgress::CompressTheDump);
|
||||
let path = self.scheduler.dumps_path.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
// if we reached this step we can tell the scheduler we succeeded to dump ourselves.
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::Dump { dump_uid: Some(dump_uid) });
|
||||
Ok(vec![task])
|
||||
}
|
||||
}
|
||||
531
crates/index-scheduler/src/scheduler/process_index_operation.rs
Normal file
531
crates/index-scheduler/src/scheduler/process_index_operation.rs
Normal file
@@ -0,0 +1,531 @@
|
||||
use bumpalo::collections::CollectIn;
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli::documents::PrimaryKey;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use meilisearch_types::Index;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::{DocumentOperation, IndexOperation};
|
||||
use crate::processing::{
|
||||
DocumentDeletionProgress, DocumentEditionProgress, DocumentOperationProgress, SettingsProgress,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Process the index operation on the given index.
|
||||
///
|
||||
/// ## Return
|
||||
/// The list of processed tasks.
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self, index_wtxn, index, progress),
|
||||
target = "indexing::scheduler"
|
||||
)]
|
||||
pub(crate) fn apply_index_operation<'i>(
|
||||
&self,
|
||||
index_wtxn: &mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
progress: Progress,
|
||||
) -> Result<Vec<Task>> {
|
||||
let indexer_alloc = Bump::new();
|
||||
|
||||
let started_processing_at = std::time::Instant::now();
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { index_uid, mut tasks } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index)
|
||||
.execute()
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid)))?;
|
||||
|
||||
let mut first_clear_found = false;
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
// The first document clear will effectively delete every documents
|
||||
// in the database but the next ones will clear 0 documents.
|
||||
task.details = match &task.kind {
|
||||
KindWithContent::DocumentClear { .. } => {
|
||||
let count = if first_clear_found { 0 } else { count };
|
||||
first_clear_found = true;
|
||||
Some(Details::ClearAll { deleted_documents: Some(count) })
|
||||
}
|
||||
otherwise => otherwise.default_details(),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||
// to a fresh thread.
|
||||
let mut content_files = Vec::new();
|
||||
for operation in &operations {
|
||||
match operation {
|
||||
DocumentOperation::Replace(content_uuid)
|
||||
| DocumentOperation::Update(content_uuid) => {
|
||||
let content_file = self.queue.file_store.get_update(*content_uuid)?;
|
||||
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
|
||||
content_files.push(mmap);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut content_files_iter = content_files.iter();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
for operation in operations {
|
||||
match operation {
|
||||
DocumentOperation::Replace(_content_uuid) => {
|
||||
let mmap = content_files_iter.next().unwrap();
|
||||
indexer
|
||||
.replace_documents(mmap)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
}
|
||||
DocumentOperation::Update(_content_uuid) => {
|
||||
let mmap = content_files_iter.next().unwrap();
|
||||
indexer
|
||||
.update_documents(mmap)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
}
|
||||
DocumentOperation::Delete(document_ids) => {
|
||||
let document_ids: bumpalo::collections::vec::Vec<_> = document_ids
|
||||
.iter()
|
||||
.map(|s| &*indexer_alloc.alloc_str(s))
|
||||
.collect_in(&indexer_alloc);
|
||||
indexer.delete_documents(document_ids.into_bump_slice());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
|
||||
let (document_changes, operation_stats, primary_key) = indexer
|
||||
.into_changes(
|
||||
&indexer_alloc,
|
||||
index,
|
||||
&rtxn,
|
||||
primary_key.as_deref(),
|
||||
&mut new_fields_ids_map,
|
||||
&|| must_stop_processing.get(),
|
||||
progress.clone(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
|
||||
let mut candidates_count = 0;
|
||||
for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
|
||||
candidates_count += stats.document_count;
|
||||
match stats.error {
|
||||
Some(error) => {
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(milli::Error::UserError(error).into());
|
||||
}
|
||||
None => task.status = Status::Succeeded,
|
||||
}
|
||||
|
||||
task.details = match task.details {
|
||||
Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => {
|
||||
Some(Details::DocumentAdditionOrUpdate {
|
||||
received_documents,
|
||||
indexed_documents: Some(stats.document_count),
|
||||
})
|
||||
}
|
||||
Some(Details::DocumentDeletion { provided_ids, .. }) => {
|
||||
Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(stats.document_count),
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// In the case of a `documentAdditionOrUpdate` or `DocumentDeletion`
|
||||
// the details MUST be set to either addition or deletion
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
progress.update_progress(DocumentOperationProgress::Indexing);
|
||||
if tasks.iter().any(|res| res.error.is_none()) {
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
|
||||
let addition = DocumentAdditionResult {
|
||||
indexed_documents: candidates_count,
|
||||
number_of_documents: index
|
||||
.number_of_documents(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
};
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
||||
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
|
||||
|
||||
let (filter, code) = if let KindWithContent::DocumentEdition {
|
||||
filter_expr,
|
||||
context: _,
|
||||
function,
|
||||
..
|
||||
} = &task.kind
|
||||
{
|
||||
(filter_expr, function)
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let candidates = match filter.as_ref().map(Filter::from_json) {
|
||||
Some(Ok(Some(filter))) => filter
|
||||
.evaluate(index_wtxn, index)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
None | Some(Ok(None)) => index.documents_ids(index_wtxn)?,
|
||||
Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))),
|
||||
};
|
||||
|
||||
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
..
|
||||
}) = task.details
|
||||
{
|
||||
(original_filter, context, function)
|
||||
} else {
|
||||
// In the case of a `documentEdition` the details MUST be set
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
if candidates.is_empty() {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
deleted_documents: Some(0),
|
||||
edited_documents: Some(0),
|
||||
});
|
||||
|
||||
return Ok(vec![task]);
|
||||
}
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
// candidates not empty => index not empty => a primary key is set
|
||||
let primary_key = index.primary_key(&rtxn)?.unwrap();
|
||||
|
||||
let primary_key =
|
||||
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
|
||||
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
|
||||
|
||||
if task.error.is_none() {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
let candidates_count = candidates.len();
|
||||
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
|
||||
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||
let document_changes = pool
|
||||
.install(|| {
|
||||
indexer
|
||||
.into_changes(&primary_key)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))
|
||||
})
|
||||
.unwrap()?;
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // cannot change primary key in DocumentEdition
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
let addition = DocumentAdditionResult {
|
||||
indexed_documents: candidates_count,
|
||||
number_of_documents: index
|
||||
.number_of_documents(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
};
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
match result_count {
|
||||
Ok((deleted_documents, edited_documents)) => {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
deleted_documents: Some(deleted_documents),
|
||||
edited_documents: Some(edited_documents),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
task.status = Status::Failed;
|
||||
task.details = Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
deleted_documents: Some(0),
|
||||
edited_documents: Some(0),
|
||||
});
|
||||
task.error = Some(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(vec![task])
|
||||
}
|
||||
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
||||
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
|
||||
|
||||
let mut to_delete = RoaringBitmap::new();
|
||||
let external_documents_ids = index.external_documents_ids();
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
let before = to_delete.len();
|
||||
task.status = Status::Succeeded;
|
||||
|
||||
match &task.kind {
|
||||
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
||||
for id in documents_ids {
|
||||
if let Some(id) = external_documents_ids.get(index_wtxn, id)? {
|
||||
to_delete.insert(id);
|
||||
}
|
||||
}
|
||||
let will_be_removed = to_delete.len() - before;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids: documents_ids.len(),
|
||||
deleted_documents: Some(will_be_removed),
|
||||
});
|
||||
}
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => {
|
||||
let before = to_delete.len();
|
||||
let filter = match Filter::from_json(filter_expr) {
|
||||
Ok(filter) => filter,
|
||||
Err(err) => {
|
||||
// theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(
|
||||
Error::from_milli(err, Some(index_uid.clone())).into(),
|
||||
);
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(filter) = filter {
|
||||
let candidates = filter
|
||||
.evaluate(index_wtxn, index)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())));
|
||||
match candidates {
|
||||
Ok(candidates) => to_delete |= candidates,
|
||||
Err(err) => {
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(err.into());
|
||||
}
|
||||
};
|
||||
}
|
||||
let will_be_removed = to_delete.len() - before;
|
||||
if let Some(Details::DocumentDeletionByFilter {
|
||||
original_filter: _,
|
||||
deleted_documents,
|
||||
}) = &mut task.details
|
||||
{
|
||||
*deleted_documents = Some(will_be_removed);
|
||||
} else {
|
||||
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
if to_delete.is_empty() {
|
||||
return Ok(tasks);
|
||||
}
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
// to_delete not empty => index not empty => primary key set
|
||||
let primary_key = index.primary_key(&rtxn)?.unwrap();
|
||||
|
||||
let primary_key =
|
||||
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
|
||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
|
||||
let mut indexer = indexer::DocumentDeletion::new();
|
||||
let candidates_count = to_delete.len();
|
||||
indexer.delete_documents_by_docids(to_delete);
|
||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
let addition = DocumentAdditionResult {
|
||||
indexed_documents: candidates_count,
|
||||
number_of_documents: index
|
||||
.number_of_documents(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
};
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
||||
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
|
||||
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
||||
let checked_settings = settings.clone().check();
|
||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||
|
||||
// We can apply the status right now and if an update fail later
|
||||
// the whole batch will be marked as failed.
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
cleared_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
} => {
|
||||
let mut import_tasks = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::DocumentClear {
|
||||
index_uid: index_uid.clone(),
|
||||
tasks: cleared_tasks,
|
||||
},
|
||||
progress.clone(),
|
||||
)?;
|
||||
|
||||
let settings_tasks = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||
progress,
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
tasks.append(&mut import_tasks);
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::{compression, VERSION_FILE_NAME};
|
||||
|
||||
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_snapshot(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
||||
|
||||
fs::create_dir_all(&self.scheduler.snapshots_path)?;
|
||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||
|
||||
// 1. Snapshot the version file.
|
||||
let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME);
|
||||
fs::copy(&self.scheduler.version_file_path, dst)?;
|
||||
|
||||
// 2. Snapshot the index-scheduler LMDB env
|
||||
//
|
||||
// When we call copy_to_file, LMDB opens a read transaction by itself,
|
||||
// we can't provide our own. It is an issue as we would like to know
|
||||
// the update files to copy but new ones can be enqueued between the copy
|
||||
// of the env and the new transaction we open to retrieve the enqueued tasks.
|
||||
// So we prefer opening a new transaction after copying the env and copy more
|
||||
// update files than not enough.
|
||||
//
|
||||
// Note that there cannot be any update files deleted between those
|
||||
// two read operations as the task processing is synchronous.
|
||||
|
||||
// 2.1 First copy the LMDB env of the index-scheduler
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||
let dst = temp_snapshot_dir.path().join("tasks");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 2.2 Create a read transaction on the index-scheduler
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2.3 Create the update files directory
|
||||
let update_files_dir = temp_snapshot_dir.path().join("update_files");
|
||||
fs::create_dir_all(&update_files_dir)?;
|
||||
|
||||
// 2.4 Only copy the update files of the enqueued tasks
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||
let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||
progress.update_progress(update_file_progress);
|
||||
for task_id in enqueued {
|
||||
let task =
|
||||
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
let src = self.queue.file_store.get_update_path(content_uuid);
|
||||
let dst = update_files_dir.join(content_uuid.to_string());
|
||||
fs::copy(src, dst)?;
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3. Snapshot every indexes
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||
let index_mapping = self.index_mapper.index_mapping;
|
||||
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||
|
||||
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
|
||||
let (name, uuid) = result?;
|
||||
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
|
||||
name, i as u32, nb_indexes,
|
||||
));
|
||||
let index = self.index_mapper.index(&rtxn, name)?;
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
fs::create_dir_all(&dst)?;
|
||||
index
|
||||
.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
|
||||
// 4. Snapshot the auth LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||
let dst = temp_snapshot_dir.path().join("auth");
|
||||
fs::create_dir_all(&dst)?;
|
||||
// TODO We can't use the open_auth_store_env function here but we should
|
||||
let auth = unsafe {
|
||||
milli::heed::EnvOpenOptions::new()
|
||||
.map_size(1024 * 1024 * 1024) // 1 GiB
|
||||
.max_dbs(2)
|
||||
.open(&self.scheduler.auth_path)
|
||||
}?;
|
||||
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 5. Copy and tarball the flat snapshot
|
||||
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
|
||||
// 5.1 Find the original name of the database
|
||||
// TODO find a better way to get this path
|
||||
let mut base_path = self.env.path().to_owned();
|
||||
base_path.pop();
|
||||
let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms");
|
||||
|
||||
// 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension
|
||||
let snapshot_path = self.scheduler.snapshots_path.join(format!("{}.snapshot", db_name));
|
||||
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.scheduler.snapshots_path)?;
|
||||
compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?;
|
||||
let file = temp_snapshot_file.persist(snapshot_path)?;
|
||||
|
||||
// 5.3 Change the permission to make the snapshot readonly
|
||||
let mut permissions = file.metadata()?.permissions();
|
||||
permissions.set_readonly(true);
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
#[allow(clippy::non_octal_unix_permissions)]
|
||||
// rwxrwxrwx
|
||||
permissions.set_mode(0b100100100);
|
||||
}
|
||||
|
||||
file.set_permissions(permissions)?;
|
||||
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
49
crates/index-scheduler/src/scheduler/process_upgrade/mod.rs
Normal file
49
crates/index-scheduler/src/scheduler/process_upgrade/mod.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_upgrade(
|
||||
&self,
|
||||
db_version: (u32, u32, u32),
|
||||
progress: Progress,
|
||||
) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::ProcessUpgrade)?;
|
||||
|
||||
enum UpgradeIndex {}
|
||||
let indexes = self.index_names()?;
|
||||
|
||||
for (i, uid) in indexes.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<UpgradeIndex>::new(
|
||||
format!("Upgrading index `{uid}`"),
|
||||
i as u32,
|
||||
indexes.len() as u32,
|
||||
));
|
||||
let index = self.index(uid)?;
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let regen_stats = milli::update::upgrade::upgrade(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
db_version,
|
||||
progress.clone(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
if regen_stats {
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
|
||||
// Release wtxn as soon as possible because it stops us from registering tasks
|
||||
let mut index_schd_wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut index_schd_wtxn, uid, &stats)?;
|
||||
index_schd_wtxn.commit()?;
|
||||
} else {
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
expression: task.details
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"embedders": {
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
expression: config.embedder_options
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
expression: task.details
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"embedders": {
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"doggo": "Intel",
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: task.details
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"embedders": {
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"doggo": "kefir",
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: fakerest_config.embedder_options
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: simple_hf_config.embedder_options
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"HuggingFace": {
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: task.details
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"embedders": {
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user