mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-12 15:45:48 +00:00
Compare commits
258 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ad094e95d | ||
|
|
f1c0ebab5b | ||
|
|
59fe64adec | ||
|
|
7d22a6eb3a | ||
|
|
9cf91f3ffe | ||
|
|
666b16e1d1 | ||
|
|
5b467ed4ce | ||
|
|
6e98fe5f2d | ||
|
|
1fcd330751 | ||
|
|
d5583ba1e9 | ||
|
|
50532ccccc | ||
|
|
dacb711ea7 | ||
|
|
a90d467163 | ||
|
|
c1dcb618f1 | ||
|
|
c71add854d | ||
|
|
e484bfc514 | ||
|
|
ccc54b1d23 | ||
|
|
bf33ca0c38 | ||
|
|
532684981d | ||
|
|
ce2dd8e2f9 | ||
|
|
d90febdc82 | ||
|
|
f0e73333af | ||
|
|
a682f79487 | ||
|
|
9214a9b641 | ||
|
|
51d57c1076 | ||
|
|
3954af9fe8 | ||
|
|
d8880a93b7 | ||
|
|
27bd557396 | ||
|
|
c322b307bc | ||
|
|
7aad304224 | ||
|
|
61a7f68113 | ||
|
|
8d3af3dea2 | ||
|
|
b82530e4d5 | ||
|
|
eaa249ca94 | ||
|
|
a3def29f11 | ||
|
|
dd5db5257d | ||
|
|
4e5a3fee5d | ||
|
|
22027c782a | ||
|
|
44e7377240 | ||
|
|
71f359b10b | ||
|
|
771d1e8282 | ||
|
|
87b2f8f7c2 | ||
|
|
aed03f1473 | ||
|
|
7ff517bf3a | ||
|
|
961a960fff | ||
|
|
093b358864 | ||
|
|
71ea943386 | ||
|
|
a878875aca | ||
|
|
6aa93e3e93 | ||
|
|
2be35e9c5c | ||
|
|
bea64ecc5c | ||
|
|
fb96e8496e | ||
|
|
0dd9d173c6 | ||
|
|
ff9439b5ac | ||
|
|
355950939a | ||
|
|
7c502794d5 | ||
|
|
60669dfa35 | ||
|
|
d6cd954e4b | ||
|
|
7429faf046 | ||
|
|
edbe32e53e | ||
|
|
74fe44e18e | ||
|
|
ccbcacec22 | ||
|
|
43a11d2f66 | ||
|
|
75fcbfc2fe | ||
|
|
8c19b6d55e | ||
|
|
08d0f05ece | ||
|
|
4762e9afa0 | ||
|
|
12fcab91c5 | ||
|
|
792a72a23f | ||
|
|
2dd7f29edf | ||
|
|
ff680d29a8 | ||
|
|
00420dfca0 | ||
|
|
a3a86ac629 | ||
|
|
f6210b8e5e | ||
|
|
fe46af7ded | ||
|
|
57b94b411f | ||
|
|
a7b6f65851 | ||
|
|
1ec6646d8c | ||
|
|
2dccacf273 | ||
|
|
ce0f04e9ee | ||
|
|
9ba5c6d371 | ||
|
|
56673fee56 | ||
|
|
b30bcbb931 | ||
|
|
5fbe4436c8 | ||
|
|
8fa253c293 | ||
|
|
4833da9edb | ||
|
|
c0e31a4f01 | ||
|
|
c06ffb31d1 | ||
|
|
3097314b9d | ||
|
|
786a978237 | ||
|
|
03e53aaf6d | ||
|
|
2206f045a4 | ||
|
|
246cf8b2d1 | ||
|
|
82adabc5a0 | ||
|
|
c9a22247d2 | ||
|
|
c535b8ddef | ||
|
|
8e89619aed | ||
|
|
f617ca8e38 | ||
|
|
959175ad2a | ||
|
|
341ffbf5ef | ||
|
|
542f3073f4 | ||
|
|
0f134b079f | ||
|
|
9e7ae47355 | ||
|
|
1edf07df29 | ||
|
|
88aa3cddde | ||
|
|
e6846cb55a | ||
|
|
29b715e2f9 | ||
|
|
f28dc5bd2b | ||
|
|
56d0b8ea54 | ||
|
|
514edb1b79 | ||
|
|
cfb609d41d | ||
|
|
11cb062067 | ||
|
|
2ca4926ac5 | ||
|
|
834bd9b879 | ||
|
|
cac7e00983 | ||
|
|
e9300bac64 | ||
|
|
b0da7864a4 | ||
|
|
2b9d379feb | ||
|
|
8d585a04d4 | ||
|
|
0095a72fba | ||
|
|
651339648c | ||
|
|
a489f4c172 | ||
|
|
3b875ea00e | ||
|
|
9d269c499c | ||
|
|
da35ae0a6e | ||
|
|
61945b235d | ||
|
|
e936ac172d | ||
|
|
162a84cdbf | ||
|
|
92c63cf351 | ||
|
|
fca35b7476 | ||
|
|
4056657a55 | ||
|
|
685d227597 | ||
|
|
49b9f6ff38 | ||
|
|
79d0a3fb97 | ||
|
|
313ef7e79b | ||
|
|
256407be61 | ||
|
|
8b3943bd32 | ||
|
|
87b972d29a | ||
|
|
09ab61b360 | ||
|
|
2459f381b4 | ||
|
|
6442f02de4 | ||
|
|
91c4d9ea79 | ||
|
|
92a4091da3 | ||
|
|
29a337f0f9 | ||
|
|
8c3cebadaa | ||
|
|
b566458aa2 | ||
|
|
ae4344e359 | ||
|
|
b6cb384650 | ||
|
|
2c3e3d856c | ||
|
|
93e97f814c | ||
|
|
e9350f033d | ||
|
|
54c92fd6c0 | ||
|
|
4f4df83a51 | ||
|
|
a51021cab7 | ||
|
|
e33f4fdeae | ||
|
|
e407bca196 | ||
|
|
cd24ea11b4 | ||
|
|
ba578e7ab5 | ||
|
|
05a74d1e68 | ||
|
|
41d61deb97 | ||
|
|
bba292b01a | ||
|
|
96923dff33 | ||
|
|
8f9c9305da | ||
|
|
a9f309e1d1 | ||
|
|
e456a9acd8 | ||
|
|
9b7d29466c | ||
|
|
b0ef14b6f0 | ||
|
|
36febe2068 | ||
|
|
6f14a6ec18 | ||
|
|
1a45b19e7e | ||
|
|
bd7525b166 | ||
|
|
359757d939 | ||
|
|
1c6eea596c | ||
|
|
693b6f483e | ||
|
|
818a4aa6d9 | ||
|
|
ddadeb99e9 | ||
|
|
b8d8be934a | ||
|
|
7175d70b8f | ||
|
|
8a3e65ab6f | ||
|
|
4737e1a2a5 | ||
|
|
36522e951b | ||
|
|
fce046d84d | ||
|
|
3fc507bb44 | ||
|
|
fdbcd033fb | ||
|
|
aaab49baca | ||
|
|
0d0d6e8099 | ||
|
|
c1e351c92b | ||
|
|
67cab4cc9d | ||
|
|
f30a37b0fe | ||
|
|
a78a9f80dd | ||
|
|
439fee5434 | ||
|
|
9e858590e0 | ||
|
|
29eebd5f93 | ||
|
|
07da6edbdf | ||
|
|
22b83042e6 | ||
|
|
52ab13906a | ||
|
|
29bec8efd4 | ||
|
|
6947a8990b | ||
|
|
fbb2bb0c73 | ||
|
|
15918f53a9 | ||
|
|
d7f5f3a0a3 | ||
|
|
1afbf35f27 | ||
|
|
d7675233d5 | ||
|
|
c63c1ac32b | ||
|
|
6171dcde0d | ||
|
|
04bc134324 | ||
|
|
8ff39d927d | ||
|
|
ffd461c800 | ||
|
|
9134d27980 | ||
|
|
f60242979f | ||
|
|
d347417cfd | ||
|
|
55d54afd69 | ||
|
|
dca7679c47 | ||
|
|
a34b692396 | ||
|
|
63829b62e9 | ||
|
|
44c8252ad5 | ||
|
|
19ae428890 | ||
|
|
7adcb657ae | ||
|
|
9624768976 | ||
|
|
5025acfd2a | ||
|
|
4bbfdccc3e | ||
|
|
a5b24b54b8 | ||
|
|
461e69c143 | ||
|
|
915aeafefe | ||
|
|
408529d8b2 | ||
|
|
1724ab6d94 | ||
|
|
49a500a342 | ||
|
|
f26eabcfa1 | ||
|
|
b468c090f3 | ||
|
|
c14114840e | ||
|
|
7933d1f9ea | ||
|
|
6f1d3f337b | ||
|
|
9640706c5a | ||
|
|
01cd273a52 | ||
|
|
ae87d1cab9 | ||
|
|
d5a5372aba | ||
|
|
cf62af13e8 | ||
|
|
0d5e176dc2 | ||
|
|
d6f36a773d | ||
|
|
91cf94c196 | ||
|
|
753ba39199 | ||
|
|
3944c25853 | ||
|
|
925bce5fbd | ||
|
|
62065ed30d | ||
|
|
97e6ae1957 | ||
|
|
5ed9be0789 | ||
|
|
7597b1049f | ||
|
|
d99150f21b | ||
|
|
c9726674a0 | ||
|
|
205f40b3b8 | ||
|
|
361580f451 | ||
|
|
a8d55562e9 | ||
|
|
40d649ec9e | ||
|
|
c272ac8204 | ||
|
|
e18c677f0e | ||
|
|
84a288da57 | ||
|
|
cbfc325b56 | ||
|
|
ea640b076e |
5
.github/ISSUE_TEMPLATE/new_feature_issue.md
vendored
5
.github/ISSUE_TEMPLATE/new_feature_issue.md
vendored
@@ -24,6 +24,11 @@ TBD
|
||||
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
|
||||
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
|
||||
|
||||
### Reminders when adding features
|
||||
|
||||
- [ ] Write unit tests using insta
|
||||
- [ ] Write declarative integration tests in [workloads/tests](https://github.com/meilisearch/meilisearch/tree/main/workloads/test). Specify the routes to call and then call `cargo xtask test workloads/tests/YOUR_TEST.json --update-responses` so that responses are automatically filled.
|
||||
|
||||
### Reminders when modifying the API
|
||||
|
||||
- [ ] Update the openAPI file with utoipa:
|
||||
|
||||
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
4
.github/workflows/bench-pr.yml
vendored
4
.github/workflows/bench-pr.yml
vendored
@@ -66,9 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
|
||||
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
||||
run: |
|
||||
|
||||
4
.github/workflows/bench-push-indexing.yml
vendored
4
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,9 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
||||
|
||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-pr.yml
vendored
2
.github/workflows/benchmarks-pr.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
6
.github/workflows/db-change-comments.yml
vendored
6
.github/workflows/db-change-comments.yml
vendored
@@ -6,7 +6,7 @@ on:
|
||||
|
||||
env:
|
||||
MESSAGE: |
|
||||
### Hello, I'm a bot 🤖
|
||||
### Hello, I'm a bot 🤖
|
||||
|
||||
You are receiving this message because you declared that this PR make changes to the Meilisearch database.
|
||||
Depending on the nature of the change, additional actions might be required on your part. The following sections detail the additional actions depending on the nature of the change, please copy the relevant section in the description of your PR, and make sure to perform the required actions.
|
||||
@@ -19,6 +19,7 @@ env:
|
||||
|
||||
- [ ] Detail the change to the DB format and why they are forward compatible
|
||||
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
|
||||
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
|
||||
|
||||
|
||||
## This PR makes breaking changes
|
||||
@@ -35,8 +36,7 @@ env:
|
||||
- [ ] Write the code to go from the old database to the new one
|
||||
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
|
||||
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
|
||||
- [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
|
||||
|
||||
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
|
||||
|
||||
jobs:
|
||||
add-comment:
|
||||
|
||||
10
.github/workflows/flaky-tests.yml
vendored
10
.github/workflows/flaky-tests.yml
vendored
@@ -3,7 +3,7 @@ name: Look for flaky tests
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 4 * * *' # Every day at 4:00AM
|
||||
- cron: "0 4 * * *" # Every day at 4:00AM
|
||||
|
||||
jobs:
|
||||
flaky:
|
||||
@@ -13,11 +13,17 @@ jobs:
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
|
||||
4
.github/workflows/fuzzer-indexing.yml
vendored
4
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,9 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run the fuzzer
|
||||
|
||||
8
.github/workflows/publish-apt-brew-pkg.yml
vendored
8
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -25,7 +25,13 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
175
.github/workflows/publish-docker-images.yml
vendored
175
.github/workflows/publish-docker-images.yml
vendored
@@ -14,10 +14,105 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: docker
|
||||
build:
|
||||
runs-on: ${{ matrix.runner }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [amd64, arm64]
|
||||
edition: [community, enterprise]
|
||||
include:
|
||||
- platform: amd64
|
||||
runner: ubuntu-24.04
|
||||
- platform: arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
- edition: community
|
||||
registry: getmeili/meilisearch
|
||||
feature-flag: ""
|
||||
- edition: enterprise
|
||||
registry: getmeili/meilisearch-enterprise
|
||||
feature-flag: "--features enterprise"
|
||||
|
||||
permissions: {}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=linux/${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
platforms: linux/${{ matrix.platform }}
|
||||
install: true
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ matrix.registry }}
|
||||
# Prevent `latest` to be updated for each new tag pushed.
|
||||
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
|
||||
flavor: latest=false
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
|
||||
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push by digest
|
||||
uses: docker/build-push-action@v6
|
||||
id: build-and-push
|
||||
with:
|
||||
platforms: linux/${{ matrix.platform }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ matrix.registry }}
|
||||
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ github.sha }}
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
EXTRA_ARGS=${{ matrix.feature-flag }}
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p ${{ runner.temp }}/digests
|
||||
digest="${{ steps.build-and-push.outputs.digest }}"
|
||||
touch "${{ runner.temp }}/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: digests-${{ matrix.edition }}-${{ env.PLATFORM_PAIR }}
|
||||
path: ${{ runner.temp }}/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
edition: [community, enterprise]
|
||||
include:
|
||||
- edition: community
|
||||
registry: getmeili/meilisearch
|
||||
- edition: enterprise
|
||||
registry: getmeili/meilisearch-enterprise
|
||||
needs:
|
||||
- build
|
||||
|
||||
permissions:
|
||||
id-token: write # This is needed to use Cosign in keyless mode
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
@@ -58,26 +153,30 @@ jobs:
|
||||
|
||||
echo "date=$commit_date" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: ${{ runner.temp }}/digests
|
||||
pattern: digests-${{ matrix.edition }}-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: getmeili/meilisearch
|
||||
images: ${{ matrix.registry }}
|
||||
# Prevent `latest` to be updated for each new tag pushed.
|
||||
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
|
||||
flavor: latest=false
|
||||
@@ -88,33 +187,31 @@ jobs:
|
||||
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
id: build-and-push
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ github.sha }}
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
- name: Create manifest list and push
|
||||
working-directory: ${{ runner.temp }}/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ matrix.registry }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image to fetch digest to sign
|
||||
run: |
|
||||
digest=$(docker buildx imagetools inspect --format='{{ json .Manifest }}' ${{ matrix.registry }}:${{ steps.meta.outputs.version }} | jq -r '.digest')
|
||||
echo "DIGEST=${digest}" >> $GITHUB_ENV
|
||||
|
||||
- name: Sign the images with GitHub OIDC Token
|
||||
env:
|
||||
DIGEST: ${{ steps.build-and-push.outputs.digest }}
|
||||
TAGS: ${{ steps.meta.outputs.tags }}
|
||||
run: |
|
||||
images=""
|
||||
for tag in ${TAGS}; do
|
||||
images+="${tag}@${DIGEST} "
|
||||
images+="${tag}@${{ env.DIGEST }} "
|
||||
done
|
||||
cosign sign --yes ${images}
|
||||
|
||||
# /!\ Don't touch this without checking with Cloud team
|
||||
- name: Send CI information to Cloud team
|
||||
# /!\ Don't touch this without checking with engineers working on the Cloud code base on #discussion-engineering Slack channel
|
||||
- name: Notify meilisearch-cloud
|
||||
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
|
||||
if: github.event_name == 'push'
|
||||
if: ${{ (github.event_name == 'push') && (matrix.edition == 'enterprise') }}
|
||||
uses: peter-evans/repository-dispatch@v3
|
||||
with:
|
||||
token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
@@ -122,21 +219,13 @@ jobs:
|
||||
event-type: cloud-docker-build
|
||||
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
|
||||
|
||||
# Send notification to Swarmia to notify of a deployment: https://app.swarmia.com
|
||||
# - name: 'Setup jq'
|
||||
# uses: dcarbone/install-jq-action
|
||||
# - name: Send deployment to Swarmia
|
||||
# if: github.event_name == 'push' && success()
|
||||
# run: |
|
||||
# JSON_STRING=$( jq --null-input --compact-output \
|
||||
# --arg version "${{ github.ref_name }}" \
|
||||
# --arg appName "meilisearch" \
|
||||
# --arg environment "production" \
|
||||
# --arg commitSha "${{ github.sha }}" \
|
||||
# --arg repositoryFullName "${{ github.repository }}" \
|
||||
# '{"version": $version, "appName": $appName, "environment": $environment, "commitSha": $commitSha, "repositoryFullName": $repositoryFullName}' )
|
||||
|
||||
# curl -H "Authorization: ${{ secrets.SWARMIA_DEPLOYMENTS_AUTHORIZATION }}" \
|
||||
# -H "Content-Type: application/json" \
|
||||
# -d "$JSON_STRING" \
|
||||
# https://hook.swarmia.com/deployments
|
||||
# /!\ Don't touch this without checking with integration team members on #discussion-integrations Slack channel
|
||||
- name: Notify meilisearch-kubernetes
|
||||
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event), or if not stable
|
||||
if: ${{ github.event_name == 'push' && matrix.edition == 'community' && steps.check-tag-format.outputs.stable == 'true' }}
|
||||
uses: peter-evans/repository-dispatch@v3
|
||||
with:
|
||||
token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
repository: meilisearch/meilisearch-kubernetes
|
||||
event-type: meilisearch-release
|
||||
client-payload: '{ "version": "${{ github.ref_name }}" }'
|
||||
|
||||
178
.github/workflows/publish-release-assets.yml
vendored
178
.github/workflows/publish-release-assets.yml
vendored
@@ -32,157 +32,61 @@ jobs:
|
||||
if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true'
|
||||
run: bash .github/scripts/check-release.sh
|
||||
|
||||
publish-linux:
|
||||
name: Publish binary for Linux
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-windows:
|
||||
name: Publish binary for ${{ matrix.os }}
|
||||
publish-binaries:
|
||||
name: Publish binary for ${{ matrix.release }} ${{ matrix.edition }} edition
|
||||
runs-on: ${{ matrix.os }}
|
||||
needs: check-version
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-13, windows-2022]
|
||||
edition: [community, enterprise]
|
||||
release:
|
||||
[macos-amd64, macos-aarch64, windows, linux-amd64, linux-aarch64]
|
||||
include:
|
||||
- os: macos-13
|
||||
artifact_name: meilisearch
|
||||
asset_name: meilisearch-macos-amd64
|
||||
- os: windows-2022
|
||||
artifact_name: meilisearch.exe
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
- edition: "community"
|
||||
feature-flag: ""
|
||||
edition-suffix: ""
|
||||
- edition: "enterprise"
|
||||
feature-flag: "--features enterprise"
|
||||
edition-suffix: "enterprise-"
|
||||
- release: macos-amd64
|
||||
os: macos-15-intel
|
||||
binary_path: release/meilisearch
|
||||
asset_name: macos-amd64
|
||||
extra-args: ""
|
||||
- release: macos-aarch64
|
||||
os: macos-14
|
||||
binary_path: aarch64-apple-darwin/release/meilisearch
|
||||
asset_name: macos-apple-silicon
|
||||
extra-args: "--target aarch64-apple-darwin"
|
||||
- release: windows
|
||||
os: windows-2022
|
||||
binary_path: release/meilisearch.exe
|
||||
asset_name: windows-amd64.exe
|
||||
extra-args: ""
|
||||
- release: linux-amd64
|
||||
os: ubuntu-22.04
|
||||
binary_path: x86_64-unknown-linux-gnu/release/meilisearch
|
||||
asset_name: linux-amd64
|
||||
extra-args: "--target x86_64-unknown-linux-gnu"
|
||||
- release: linux-aarch64
|
||||
os: ubuntu-22.04-arm
|
||||
binary_path: aarch64-unknown-linux-gnu/release/meilisearch
|
||||
asset_name: linux-aarch64
|
||||
extra-args: "--target aarch64-unknown-linux-gnu"
|
||||
needs: check-version
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
run: cargo build --release --locked ${{ matrix.feature-flag }} ${{ matrix.extra-args }}
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-apple-silicon:
|
||||
name: Publish binary for macOS silicon
|
||||
runs-on: macos-13
|
||||
needs: check-version
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target: aarch64-apple-darwin
|
||||
asset_name: meilisearch-macos-apple-silicon
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
- name: Cargo build
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --release --target ${{ matrix.target }}
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-aarch64:
|
||||
name: Publish binary for aarch64
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
asset_name: meilisearch-linux-aarch64
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update -y && apt upgrade -y
|
||||
apt-get install -y curl build-essential gcc-aarch64-linux-gnu
|
||||
- name: Set up Docker for cross compilation
|
||||
run: |
|
||||
apt-get install -y curl apt-transport-https ca-certificates software-properties-common
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
- name: Configure target aarch64 GNU
|
||||
## Environment variable is not passed using env:
|
||||
## LD gold won't work with MUSL
|
||||
# env:
|
||||
# JEMALLOC_SYS_WITH_LG_PAGE: 16
|
||||
# RUSTFLAGS: '-Clink-arg=-fuse-ld=gold'
|
||||
run: |
|
||||
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
|
||||
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
|
||||
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
|
||||
- name: Install a default toolchain that will be used to build cargo cross
|
||||
run: |
|
||||
rustup default stable
|
||||
- name: Cargo build
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
use-cross: true
|
||||
args: --release --target ${{ matrix.target }}
|
||||
env:
|
||||
CROSS_DOCKER_IN_DOCKER: true
|
||||
- name: List target output files
|
||||
run: ls -lR ./target
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
file: target/${{ matrix.binary_path }}
|
||||
asset_name: meilisearch-${{ matrix.edition-suffix }}${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-openapi-file:
|
||||
|
||||
24
.github/workflows/sdks-tests.yml
vendored
24
.github/workflows/sdks-tests.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -122,7 +122,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -213,7 +213,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -238,7 +238,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -263,7 +263,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -284,7 +284,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -307,7 +307,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -338,7 +338,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -370,7 +370,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
|
||||
164
.github/workflows/test-suite.yml
vendored
164
.github/workflows/test-suite.yml
vendored
@@ -15,31 +15,40 @@ env:
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
name: Tests on Ubuntu
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
runner: [ubuntu-22.04, ubuntu-22.04-arm]
|
||||
features: ["", "--features enterprise"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
- name: check free space before
|
||||
run: df -h
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- name: check free space after
|
||||
run: df -h
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo check without any default features
|
||||
with:
|
||||
key: ${{ matrix.features }}
|
||||
- name: Run cargo build without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
args: --locked --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
args: --locked --all ${{ matrix.features }}
|
||||
|
||||
test-others:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
@@ -47,51 +56,58 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-13, windows-2022]
|
||||
os: [macos-14, windows-2022]
|
||||
features: ["", "--features enterprise"]
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Run cargo check without any default features
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Run cargo build without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
args: --locked --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
args: --locked --all ${{ matrix.features }}
|
||||
|
||||
test-all-features:
|
||||
name: Tests almost all features
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
runs-on: ubuntu-22.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
cargo build --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
- name: Run cargo test with almost all features
|
||||
run: |
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
cargo test --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
|
||||
ollama-ubuntu:
|
||||
name: Test with Ollama
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- name: Install Ollama
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
||||
@@ -115,21 +131,21 @@ jobs:
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all --features test-ollama ollama
|
||||
args: --locked -p meilisearch --features test-ollama ollama
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:22.04
|
||||
runs-on: ubuntu-22.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@@ -140,36 +156,39 @@ jobs:
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
build:
|
||||
name: Build in release
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --all
|
||||
- name: Build
|
||||
run: cargo build --release --locked --target x86_64-unknown-linux-gnu
|
||||
|
||||
clippy:
|
||||
name: Run Clippy
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
features: ["", "--features enterprise"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
@@ -177,18 +196,21 @@ jobs:
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
args: --all-targets -- --deny warnings
|
||||
args: --all-targets ${{ matrix.features }} -- --deny warnings
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
@@ -199,3 +221,23 @@ jobs:
|
||||
run: |
|
||||
echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs
|
||||
cargo fmt --all -- --check
|
||||
|
||||
declarative-tests:
|
||||
name: Run declarative tests
|
||||
runs-on: ubuntu-22.04-arm
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run declarative tests
|
||||
run: |
|
||||
cargo xtask test workloads/tests/*.json
|
||||
|
||||
10
.github/workflows/update-cargo-toml-version.yml
vendored
10
.github/workflows/update-cargo-toml-version.yml
vendored
@@ -18,9 +18,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||
run: |
|
||||
sudo rm -rf "/opt/ghc" || true
|
||||
sudo rm -rf "/usr/share/dotnet" || true
|
||||
sudo rm -rf "/usr/local/lib/android" || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
- uses: dtolnay/rust-toolchain@1.91.1
|
||||
- name: Install sd
|
||||
run: cargo install sd
|
||||
- name: Update Cargo.toml file
|
||||
|
||||
@@ -124,6 +124,7 @@ They are JSON files with the following structure (comments are not actually supp
|
||||
{
|
||||
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
|
||||
"name": "hackernews.ndjson_1M,no-threads",
|
||||
"type": "bench",
|
||||
// Number of consecutive runs of the commands that should be performed.
|
||||
// Each run uses a fresh instance of Meilisearch and a fresh database.
|
||||
// Each run produces its own report file.
|
||||
|
||||
1180
Cargo.lock
generated
1180
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.26.0"
|
||||
version = "1.30.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
@@ -50,3 +50,5 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
[profile.dev.package.gemm-f16]
|
||||
opt-level = 3
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
[build.env]
|
||||
passthrough = [
|
||||
"RUST_BACKTRACE",
|
||||
"CARGO_TERM_COLOR",
|
||||
"RUSTFLAGS",
|
||||
"JEMALLOC_SYS_WITH_LG_PAGE"
|
||||
]
|
||||
@@ -8,16 +8,14 @@ WORKDIR /
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
ARG GIT_TAG
|
||||
ARG EXTRA_ARGS
|
||||
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
COPY . .
|
||||
RUN set -eux; \
|
||||
apkArch="$(apk --print-arch)"; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||
fi && \
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
cargo build --release -p meilisearch -p meilitool ${EXTRA_ARGS}
|
||||
|
||||
# Run
|
||||
FROM alpine:3.22
|
||||
|
||||
326
TESTING.md
Normal file
326
TESTING.md
Normal file
@@ -0,0 +1,326 @@
|
||||
# Declarative tests
|
||||
|
||||
Declarative tests ensure that Meilisearch features remain stable across versions.
|
||||
|
||||
While we already have unit tests, those are run against **temporary databases** that are created fresh each time and therefore never risk corruption.
|
||||
|
||||
Declarative tests instead **simulate the lifetime of a database**: they chain together commands and requests to change the binary, verifying that database state and API responses remain consistent.
|
||||
|
||||
## Basic example
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"type": "test",
|
||||
"name": "api-keys",
|
||||
"binary": { // the first command will run on the binary following this specification.
|
||||
"source": "release", // get the binary as a release from GitHub
|
||||
"version": "1.19.0", // version to fetch
|
||||
"edition": "community" // edition to fetch
|
||||
},
|
||||
"commands": []
|
||||
}
|
||||
```
|
||||
|
||||
This example defines a no-op test (it does nothing).
|
||||
|
||||
If the file is saved at `workloads/tests/example.json`, you can run it with:
|
||||
|
||||
```bash
|
||||
cargo xtask test workloads/tests/example.json
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
Commands represent API requests sent to Meilisearch endpoints during a test.
|
||||
|
||||
They are executed sequentially, and their responses can be validated to ensure consistent behavior across upgrades.
|
||||
|
||||
```jsonc
|
||||
|
||||
{
|
||||
"route": "keys",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"actions": [
|
||||
"search",
|
||||
"documents.add"
|
||||
],
|
||||
"description": "Test API Key",
|
||||
"expiresAt": null,
|
||||
"indexes": [ "movies" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This command issues a `POST /keys` request, creating an API key with permissions to search and add documents in the `movies` index.
|
||||
|
||||
### Using assets in commands
|
||||
|
||||
To keep tests concise and reusable, you can define **assets** at the root of the workload file.
|
||||
|
||||
Assets are external data sources (such as datasets) that are cached between runs, making tests faster and easier to read.
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"type": "test",
|
||||
"name": "movies",
|
||||
"binary": {
|
||||
"source": "release",
|
||||
"version": "1.19.0",
|
||||
"edition": "community"
|
||||
},
|
||||
"assets": {
|
||||
"movies.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
In this example:
|
||||
- The `movies.json` dataset is defined as an asset, pointing to a remote URL.
|
||||
- The SHA-256 checksum ensures integrity.
|
||||
- The `POST /indexes/movies/documents` command uses this asset as the request body.
|
||||
|
||||
This makes the test much cleaner than inlining a large dataset directly into the command.
|
||||
|
||||
For asset handling, please refer to the [declarative benchmarks documentation](/BENCHMARKS.md#adding-new-assets).
|
||||
|
||||
### Asserting responses
|
||||
|
||||
Commands can specify both the **expected status code** and the **expected response body**.
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
},
|
||||
"expectedStatus": 202,
|
||||
"expectedResponse": {
|
||||
"enqueuedAt": "[timestamp]", // Set to a bracketed string to ignore the value
|
||||
"indexUid": "movies",
|
||||
"status": "enqueued",
|
||||
"taskUid": 1,
|
||||
"type": "documentAdditionOrUpdate"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
```
|
||||
|
||||
Manually writing `expectedResponse` fields can be tedious.
|
||||
|
||||
Instead, you can let the test runner populate them automatically:
|
||||
|
||||
```bash
|
||||
# Run the workload to populate expected fields. Only adds the missing ones, doesn't change existing data
|
||||
cargo xtask test workloads/tests/example.json --add-missing-responses
|
||||
|
||||
# OR
|
||||
|
||||
# Run the workload to populate expected fields. Updates all fields including existing ones
|
||||
cargo xtask test workloads/tests/example.json --update-responses
|
||||
```
|
||||
|
||||
This workflow is recommended:
|
||||
|
||||
1. Write the test without expected fields.
|
||||
2. Run it with `--add-missing-responses` to capture the actual responses.
|
||||
3. Review and commit the generated expectations.
|
||||
|
||||
## Changing binary
|
||||
|
||||
It is possible to insert an instruction to change the current Meilisearch instance from one binary specification to another during a test.
|
||||
|
||||
When executed, such an instruction will:
|
||||
1. Stop the current Meilisearch instance.
|
||||
2. Fetch the binary specified by the instruction.
|
||||
3. Restart the server with the specified binary on the same database.
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"type": "test",
|
||||
"name": "movies",
|
||||
"binary": {
|
||||
"source": "release",
|
||||
"version": "1.19.0", // start with version v1.19.0
|
||||
"edition": "community"
|
||||
},
|
||||
"assets": {
|
||||
"movies.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
// setup some data
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
}
|
||||
},
|
||||
// switch binary to v1.24.0
|
||||
{
|
||||
"binary": {
|
||||
"source": "release",
|
||||
"version": "1.24.0",
|
||||
"edition": "community"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Typical Usage
|
||||
|
||||
In most cases, the change binary instruction will be used to update a database.
|
||||
|
||||
- **Set up** some data using commands on an older version.
|
||||
- **Upgrade** to the latest version.
|
||||
- **Assert** that the data and API behavior remain correct after the upgrade.
|
||||
|
||||
To properly test the dumpless upgrade, one should typically:
|
||||
|
||||
1. Open the database without processing the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` and `--experimental-max-number-of-batched-tasks=0` as extra CLI arguments
|
||||
2. Check that the search, stats and task queue still work.
|
||||
3. Open the database and process the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` as the extra CLI argument. Use a `health` command to wait for the upgrade task to finish.
|
||||
4. Check that the indexing, search, stats, and task queue still work.
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"type": "test",
|
||||
"name": "movies",
|
||||
"binary": {
|
||||
"source": "release",
|
||||
"version": "1.12.0",
|
||||
"edition": "community"
|
||||
},
|
||||
"commands": [
|
||||
// 0. Run commands to populate the database
|
||||
{
|
||||
// ..
|
||||
},
|
||||
// 1. Open the database with new MS without processing the update task
|
||||
{
|
||||
"binary": {
|
||||
"source": "build", // build the binary from the sources in the current git repository
|
||||
"edition": "community",
|
||||
"extraCliArgs": [
|
||||
"--experimental-dumpless-upgrade", // allows to open with a newer MS
|
||||
"--experimental-max-number-of-batched-tasks=0" // prevent processing of the update task
|
||||
]
|
||||
}
|
||||
},
|
||||
// 2. Check the search etc.
|
||||
{
|
||||
// ..
|
||||
},
|
||||
// 3. Open the database with new MS and processing the update task
|
||||
{
|
||||
"binary": {
|
||||
"source": "build", // build the binary from the sources in the current git repository
|
||||
"edition": "community",
|
||||
"extraCliArgs": [
|
||||
"--experimental-dumpless-upgrade" // allows to open with a newer MS
|
||||
// no `--experimental-max-number-of-batched-tasks=0`
|
||||
]
|
||||
}
|
||||
},
|
||||
// 4. Check the indexing, search, etc.
|
||||
{
|
||||
// ..
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This ensures backward compatibility: databases created with older Meilisearch versions should remain functional and consistent after an upgrade.
|
||||
|
||||
## Variables
|
||||
|
||||
Sometimes a command needs to use a value returned by a **previous response**.
|
||||
These values can be captured and reused using the register field.
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"route": "keys",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"actions": [
|
||||
"search",
|
||||
"documents.add"
|
||||
],
|
||||
"description": "Test API Key",
|
||||
"expiresAt": null,
|
||||
"indexes": [ "movies" ]
|
||||
}
|
||||
},
|
||||
"expectedResponse": {
|
||||
"key": "c6f64630bad2996b1f675007c8800168e14adf5d6a7bb1a400a6d2b158050eaf",
|
||||
// ...
|
||||
},
|
||||
"register": {
|
||||
"key": "/key"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
}
|
||||
```
|
||||
|
||||
The `register` field captures the value at the JSON path `/key` from the response.
|
||||
Paths follow the **JavaScript Object Notation Pointer (RFC 6901)** format.
|
||||
Registered variables are available for all subsequent commands.
|
||||
|
||||
Registered variables can be referenced by wrapping their name in double curly braces:
|
||||
|
||||
In the route/path:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"route": "tasks/{{ task_id }}",
|
||||
"method": "GET"
|
||||
}
|
||||
```
|
||||
|
||||
In the request body:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"id": "{{ document_id }}",
|
||||
"overview": "Shazam turns evil and the world is in danger.",
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Or they can be referenced by their name (**without curly braces**) as an API key:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": { /* ... */ },
|
||||
"apiKeyVariable": "key" // The **content** of the key variable will be used as an API key
|
||||
}
|
||||
```
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.7"
|
||||
anyhow = "1.0.100"
|
||||
bumpalo = "3.19.0"
|
||||
csv = "1.4.0"
|
||||
memmap2 = "0.9.9"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.7.0", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.12"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bytes = "1.10.1"
|
||||
convert_case = "0.8.0"
|
||||
flate2 = "1.1.2"
|
||||
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
anyhow = "1.0.100"
|
||||
bytes = "1.11.0"
|
||||
convert_case = "0.9.0"
|
||||
flate2 = "1.1.5"
|
||||
reqwest = { version = "0.12.24", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
@@ -21,6 +21,10 @@ use roaring::RoaringBitmap;
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn no_cancel() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
const BENCHMARK_ITERATION: usize = 10;
|
||||
|
||||
fn setup_dir(path: impl AsRef<Path>) {
|
||||
@@ -65,7 +69,7 @@ fn setup_settings<'t>(
|
||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
builder.execute(&no_cancel, &Progress::default(), Default::default()).unwrap();
|
||||
}
|
||||
|
||||
fn setup_index_with_settings(
|
||||
@@ -152,7 +156,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -168,7 +172,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -220,7 +224,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -236,7 +240,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -266,7 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -282,7 +286,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -336,7 +340,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -352,7 +356,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -414,7 +418,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -430,7 +434,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -460,7 +464,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -476,7 +480,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -502,7 +506,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -518,7 +522,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -571,7 +575,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -587,7 +591,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -639,7 +643,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -655,7 +659,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -707,7 +711,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -723,7 +727,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -774,7 +778,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -790,7 +794,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -820,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -836,7 +840,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -889,7 +893,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -905,7 +909,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -967,7 +971,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -983,7 +987,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1014,7 +1018,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1030,7 +1034,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1057,7 +1061,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1073,7 +1077,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1125,7 +1129,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1141,7 +1145,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1192,7 +1196,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1208,7 +1212,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1238,7 +1242,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1254,7 +1258,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1307,7 +1311,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1323,7 +1327,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1372,7 +1376,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
Some(primary_key),
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1422,7 +1426,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1438,7 +1442,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1468,7 +1472,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1484,7 +1488,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1510,7 +1514,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1526,7 +1530,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1601,7 +1605,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1617,7 +1621,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1693,7 +1697,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1709,7 +1713,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1777,7 +1781,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1793,7 +1797,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1845,7 +1849,7 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1861,7 +1865,7 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1912,7 +1916,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1928,7 +1932,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -1958,7 +1962,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -1974,7 +1978,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
@@ -2027,7 +2031,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
@@ -2043,7 +2047,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&no_cancel,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.41", features = ["parsing"] }
|
||||
time = { version = "0.3.44", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
vergen-git2 = "1.0.7"
|
||||
anyhow = "1.0.100"
|
||||
vergen-gitcl = "1.0.8"
|
||||
|
||||
@@ -15,7 +15,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
|
||||
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
|
||||
// in the corresponding GitHub workflow (publish_docker.yml).
|
||||
// This is due to the Dockerfile building the binary outside of the git directory.
|
||||
let mut builder = vergen_git2::Git2Builder::default();
|
||||
let mut builder = vergen_gitcl::GitclBuilder::default();
|
||||
|
||||
builder.branch(true);
|
||||
builder.commit_timestamp(true);
|
||||
@@ -25,5 +25,5 @@ fn emit_git_variables() -> anyhow::Result<()> {
|
||||
|
||||
let git2 = builder.build()?;
|
||||
|
||||
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
|
||||
vergen_gitcl::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
|
||||
}
|
||||
|
||||
6
crates/build-info/src/main.rs
Normal file
6
crates/build-info/src/main.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
use build_info::BuildInfo;
|
||||
|
||||
fn main() {
|
||||
let info = BuildInfo::from_build();
|
||||
dbg!(info);
|
||||
}
|
||||
@@ -11,24 +11,27 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
flate2 = "1.1.2"
|
||||
anyhow = "1.0.100"
|
||||
flate2 = "1.1.5"
|
||||
http = "1.3.1"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.21.3"
|
||||
regex = "1.11.1"
|
||||
regex = "1.12.2"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
|
||||
[features]
|
||||
enterprise = ["meilisearch-types/enterprise"]
|
||||
@@ -9,8 +9,9 @@ use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::network::{DbTaskNetwork, NetworkTopologyChange};
|
||||
use meilisearch_types::tasks::{
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||
};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -95,7 +96,7 @@ pub struct TaskDump {
|
||||
)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
pub network: Option<DbTaskNetwork>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
}
|
||||
@@ -163,6 +164,7 @@ pub enum KindDump {
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
NetworkTopologyChange(NetworkTopologyChange),
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -249,6 +251,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindWithContent::IndexCompaction { index_uid } => {
|
||||
KindDump::IndexCompaction { index_uid }
|
||||
}
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change) => {
|
||||
KindDump::NetworkTopologyChange(network_topology_change)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -262,13 +267,13 @@ pub(crate) mod test {
|
||||
use big_s::S;
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::enterprise_edition::network::{Network, Remote};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
|
||||
@@ -560,7 +565,8 @@ pub(crate) mod test {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
|
||||
sharding: false,
|
||||
leader: None,
|
||||
version: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -614,7 +620,10 @@ pub(crate) mod test {
|
||||
assert_eq!(dump.features().unwrap().unwrap(), expected);
|
||||
|
||||
// ==== checking the network
|
||||
let expected = create_test_network();
|
||||
let mut expected = create_test_network();
|
||||
// from v1.29, we drop `leader` and `local` on import
|
||||
expected.leader = None;
|
||||
expected.local = None;
|
||||
assert_eq!(&expected, dump.network().unwrap().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -434,7 +434,11 @@ pub(crate) mod test {
|
||||
// network
|
||||
|
||||
let network = dump.network().unwrap().unwrap();
|
||||
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
|
||||
|
||||
// since v1.29 we are dropping `local` and `leader` on import
|
||||
insta::assert_snapshot!(network.local.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.leader.is_none(), @"true");
|
||||
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
|
||||
|
||||
@@ -107,19 +107,14 @@ impl Settings<Unchecked> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Default, Debug, Clone, PartialEq)]
|
||||
pub enum Setting<T> {
|
||||
Set(T),
|
||||
Reset,
|
||||
#[default]
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
fn default() -> Self {
|
||||
Self::NotSet
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Setting<T> {
|
||||
pub const fn is_not_set(&self) -> bool {
|
||||
matches!(self, Self::NotSet)
|
||||
|
||||
@@ -161,19 +161,14 @@ pub struct Facets {
|
||||
pub min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Setting<T> {
|
||||
Set(T),
|
||||
Reset,
|
||||
#[default]
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
fn default() -> Self {
|
||||
Self::NotSet
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Setting<T> {
|
||||
pub fn map<U, F>(self, f: F) -> Setting<U>
|
||||
where
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::marker::PhantomData;
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::de::Visitor;
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use serde::Deserialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::settings::{Settings, Unchecked};
|
||||
@@ -82,59 +80,3 @@ impl Display for IndexUidFormatError {
|
||||
}
|
||||
|
||||
impl std::error::Error for IndexUidFormatError {}
|
||||
|
||||
/// A type that tries to match either a star (*) or
|
||||
/// any other thing that implements `FromStr`.
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub enum StarOr<T> {
|
||||
Star,
|
||||
Other(T),
|
||||
}
|
||||
|
||||
impl<'de, T, E> Deserialize<'de> for StarOr<T>
|
||||
where
|
||||
T: FromStr<Err = E>,
|
||||
E: Display,
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
|
||||
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
|
||||
/// deserialize everything as a `StarOr::Other`, including "*".
|
||||
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
|
||||
/// not supported on untagged enums.
|
||||
struct StarOrVisitor<T>(PhantomData<T>);
|
||||
|
||||
impl<T, FE> Visitor<'_> for StarOrVisitor<T>
|
||||
where
|
||||
T: FromStr<Err = FE>,
|
||||
FE: Display,
|
||||
{
|
||||
type Value = StarOr<T>;
|
||||
|
||||
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
|
||||
formatter.write_str("a string")
|
||||
}
|
||||
|
||||
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
|
||||
where
|
||||
SE: serde::de::Error,
|
||||
{
|
||||
match v {
|
||||
"*" => Ok(StarOr::Star),
|
||||
v => {
|
||||
let other = FromStr::from_str(v).map_err(|e: T::Err| {
|
||||
SE::custom(format!("Invalid `other` value: {}", e))
|
||||
})?;
|
||||
Ok(StarOr::Other(other))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(StarOrVisitor(PhantomData))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,19 +192,14 @@ pub struct Facets {
|
||||
pub min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum Setting<T> {
|
||||
Set(T),
|
||||
Reset,
|
||||
#[default]
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
fn default() -> Self {
|
||||
Self::NotSet
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Setting<T> {
|
||||
pub fn set(self) -> Option<T> {
|
||||
match self {
|
||||
|
||||
@@ -47,20 +47,15 @@ pub struct Settings<T> {
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub enum Setting<T> {
|
||||
Set(T),
|
||||
Reset,
|
||||
#[default]
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
fn default() -> Self {
|
||||
Self::NotSet
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Setting<T> {
|
||||
pub fn set(self) -> Option<T> {
|
||||
match self {
|
||||
|
||||
@@ -322,7 +322,7 @@ impl From<Task> for TaskView {
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
|
||||
let duration = finished_at.zip(started_at).map(|(tf, ts)| tf - ts);
|
||||
|
||||
Self {
|
||||
uid: id,
|
||||
|
||||
@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::enterprise_edition::network::Network;
|
||||
pub type Network = meilisearch_types::network::Network;
|
||||
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
@@ -95,17 +95,26 @@ impl V6Reader {
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let network = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
let mut network: Option<meilisearch_types::network::Network> =
|
||||
match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
|
||||
if let Some(network) = &mut network {
|
||||
// as dumps are typically imported in a different machine as the emitter (otherwise dumpless upgrade would be used),
|
||||
// we decide to remove the self to avoid alias issues
|
||||
network.local = None;
|
||||
// for the same reason we disable automatic sharding
|
||||
network.leader = None;
|
||||
}
|
||||
|
||||
let webhooks = match fs::read(dump.path().join("webhooks.json")) {
|
||||
Ok(webhooks_file) => Some(serde_json::from_reader(&*webhooks_file)?),
|
||||
|
||||
@@ -5,9 +5,9 @@ use std::path::PathBuf;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::enterprise_edition::network::Network;
|
||||
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use meilisearch_types::webhooks::WebhooksDumpView;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -16,7 +16,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.7.0", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
|
||||
@@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
arbitrary = { version = "1.4.2", features = ["derive"] }
|
||||
bumpalo = "3.19.0"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
either = "1.15.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
|
||||
@@ -11,33 +11,34 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.100"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
bytes = "1.11.0"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
convert_case = "0.9.0"
|
||||
csv = "1.4.0"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
indexmap = "2.9.0"
|
||||
flate2 = "1.1.5"
|
||||
hashbrown = "0.15.5"
|
||||
indexmap = "2.12.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.9"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
rayon = "1.11.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
@@ -45,11 +46,15 @@ time = { version = "0.3.41", features = [
|
||||
] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"rustls-tls",
|
||||
"http2",
|
||||
], default-features = false }
|
||||
rusty-s3 = "0.8.1"
|
||||
tokio = { version = "1.47.1", features = ["full"] }
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
urlencoding = "2.1.3"
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
@@ -58,3 +63,6 @@ crossbeam-channel = "0.5.15"
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
enterprise = ["meilisearch-types/enterprise"]
|
||||
|
||||
@@ -238,6 +238,9 @@ impl<'a> Dump<'a> {
|
||||
KindDump::IndexCompaction { index_uid } => {
|
||||
KindWithContent::IndexCompaction { index_uid }
|
||||
}
|
||||
KindDump::NetworkTopologyChange(network_topology_change) => {
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change)
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -3,10 +3,13 @@ use std::fmt::Display;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::milli::index::RollbackOutcome;
|
||||
use meilisearch_types::milli::DocumentId;
|
||||
use meilisearch_types::tasks::network::ReceiveTaskError;
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use meilisearch_types::{heed, milli};
|
||||
use reqwest::StatusCode;
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::TaskId;
|
||||
|
||||
@@ -191,6 +194,17 @@ pub enum Error {
|
||||
#[error(transparent)]
|
||||
HeedTransaction(heed::Error),
|
||||
|
||||
#[error("No network topology change task is currently enqueued or processing")]
|
||||
ImportTaskWithoutNetworkTask,
|
||||
#[error("The network task version (`{network_task}`) does not match the import task version (`{import_task}`)")]
|
||||
NetworkVersionMismatch { network_task: Uuid, import_task: Uuid },
|
||||
#[error("The import task emanates from an unknown remote `{0}`")]
|
||||
ImportTaskUnknownRemote(String),
|
||||
#[error("The import task with key `{0}` was already received")]
|
||||
ImportTaskAlreadyReceived(DocumentId),
|
||||
#[error("{action} requires the Enterprise Edition")]
|
||||
RequiresEnterpriseEdition { action: &'static str },
|
||||
|
||||
#[cfg(test)]
|
||||
#[error("Planned failure for tests.")]
|
||||
PlannedFailure,
|
||||
@@ -248,6 +262,11 @@ impl Error {
|
||||
| Error::Persist(_)
|
||||
| Error::FeatureNotEnabled(_)
|
||||
| Error::Export(_)
|
||||
| Error::ImportTaskWithoutNetworkTask
|
||||
| Error::NetworkVersionMismatch { .. }
|
||||
| Error::ImportTaskAlreadyReceived(_)
|
||||
| Error::ImportTaskUnknownRemote(_)
|
||||
| Error::RequiresEnterpriseEdition { .. }
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
@@ -307,6 +326,11 @@ impl ErrorCode for Error {
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::ImportTaskWithoutNetworkTask => Code::ImportTaskWithoutNetworkTask,
|
||||
Error::NetworkVersionMismatch { .. } => Code::NetworkVersionMismatch,
|
||||
Error::ImportTaskAlreadyReceived(_) => Code::ImportTaskAlreadyReceived,
|
||||
Error::ImportTaskUnknownRemote(_) => Code::ImportTaskUnknownRemote,
|
||||
Error::RequiresEnterpriseEdition { .. } => Code::RequiresEnterpriseEdition,
|
||||
Error::S3Error { status, .. } if status.is_client_error() => {
|
||||
Code::InvalidS3SnapshotRequest
|
||||
}
|
||||
@@ -345,3 +369,12 @@ impl ErrorCode for Error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ReceiveTaskError> for Error {
|
||||
fn from(value: ReceiveTaskError) -> Self {
|
||||
match value {
|
||||
ReceiveTaskError::UnknownRemote(unknown) => Error::ImportTaskUnknownRemote(unknown),
|
||||
ReceiveTaskError::DuplicateTask(dup) => Error::ImportTaskAlreadyReceived(dup),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::enterprise_edition::network::Network;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
use meilisearch_types::network::Network;
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
@@ -38,6 +38,10 @@ impl RoFeatures {
|
||||
Self { runtime }
|
||||
}
|
||||
|
||||
pub fn from_runtime_features(features: RuntimeTogglableFeatures) -> Self {
|
||||
Self { runtime: features }
|
||||
}
|
||||
|
||||
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
self.runtime
|
||||
}
|
||||
|
||||
@@ -361,6 +361,12 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The number of indexes in the database
|
||||
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
|
||||
pub fn index_count(&self, rtxn: &RoTxn) -> Result<u64> {
|
||||
Ok(self.index_mapping.len(rtxn)?)
|
||||
}
|
||||
|
||||
/// Return an index, may open it if it wasn't already opened.
|
||||
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
|
||||
@@ -6,7 +6,7 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||
use meilisearch_types::versioning;
|
||||
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
@@ -27,6 +27,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
queue,
|
||||
scheduler,
|
||||
persisted,
|
||||
export_default_payload_size_bytes: _,
|
||||
|
||||
index_mapper,
|
||||
features: _,
|
||||
@@ -320,11 +321,18 @@ fn snapshot_details(d: &Details) -> String {
|
||||
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
if to == &(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) {
|
||||
format!("{{ from: {from:?}, to: [current version] }}")
|
||||
} else {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
}
|
||||
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
|
||||
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
|
||||
}
|
||||
Details::NetworkTopologyChange { moved_documents, message } => {
|
||||
format!("{{ moved_documents: {moved_documents:?}, message: {message:?}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -400,7 +408,21 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
let details = if let Some(upgrade_to) = &details.upgrade_to {
|
||||
if upgrade_to.as_str()
|
||||
== format!("v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}").as_str()
|
||||
{
|
||||
let mut details = details.clone();
|
||||
|
||||
details.upgrade_to = Some("[current version]".into());
|
||||
serde_json::to_string(&details).unwrap()
|
||||
} else {
|
||||
serde_json::to_string(details).unwrap()
|
||||
}
|
||||
} else {
|
||||
serde_json::to_string(details).unwrap()
|
||||
};
|
||||
snap.push_str(&format!("details: {details}, "));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||
if !embedder_stats.skip_serializing() {
|
||||
snap.push_str(&format!(
|
||||
|
||||
@@ -48,13 +48,13 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use dump::Dump;
|
||||
pub use error::Error;
|
||||
pub use features::RoFeatures;
|
||||
use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::enterprise_edition::network::Network;
|
||||
use meilisearch_types::features::{
|
||||
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
|
||||
};
|
||||
@@ -67,11 +67,14 @@ use meilisearch_types::milli::vector::{
|
||||
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
|
||||
use meilisearch_types::tasks::network::{
|
||||
DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork,
|
||||
};
|
||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use processing::ProcessingTasks;
|
||||
pub use queue::Query;
|
||||
use queue::Queue;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -82,6 +85,7 @@ use uuid::Uuid;
|
||||
use versioning::Versioning;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::clamp_to_page_size;
|
||||
|
||||
pub(crate) type BEI128 = I128<BE>;
|
||||
@@ -144,9 +148,11 @@ pub struct IndexSchedulerOptions {
|
||||
/// If the autobatcher is allowed to automatically batch tasks
|
||||
/// it will only batch this defined maximum size (in bytes) of tasks at once.
|
||||
pub batched_tasks_size_limit: u64,
|
||||
/// The maximum size of the default payload for exporting documents, in bytes
|
||||
pub export_default_payload_size_bytes: Byte,
|
||||
/// The experimental features enabled for this instance.
|
||||
pub instance_features: InstanceTogglableFeatures,
|
||||
/// The experimental features enabled for this instance.
|
||||
/// Whether the index scheduler is able to auto upgrade or not.
|
||||
pub auto_upgrade: bool,
|
||||
/// The maximal number of entries in the search query cache of an embedder.
|
||||
///
|
||||
@@ -199,6 +205,9 @@ pub struct IndexScheduler {
|
||||
/// to the same embeddings for the same input text.
|
||||
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
||||
|
||||
/// The maximum size of the default payload for exporting documents, in bytes
|
||||
pub export_default_payload_size_bytes: Byte,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@@ -234,6 +243,7 @@ impl IndexScheduler {
|
||||
cleanup_enabled: self.cleanup_enabled,
|
||||
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
|
||||
persisted: self.persisted,
|
||||
export_default_payload_size_bytes: self.export_default_payload_size_bytes,
|
||||
|
||||
webhooks: self.webhooks.clone(),
|
||||
embedders: self.embedders.clone(),
|
||||
@@ -345,6 +355,7 @@ impl IndexScheduler {
|
||||
persisted,
|
||||
webhooks: Arc::new(webhooks),
|
||||
embedders: Default::default(),
|
||||
export_default_payload_size_bytes: options.export_default_payload_size_bytes,
|
||||
|
||||
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||
test_breakpoint_sdr: crossbeam_channel::bounded(0).0,
|
||||
@@ -700,14 +711,14 @@ impl IndexScheduler {
|
||||
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
|
||||
}
|
||||
|
||||
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
|
||||
pub fn set_task_network(&self, task_id: TaskId, network: DbTaskNetwork) -> Result<Task> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
|
||||
task.network = Some(network);
|
||||
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Return the batches matching the query from the user's point of view along
|
||||
@@ -757,18 +768,30 @@ impl IndexScheduler {
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
self.register_with_custom_metadata(kind, task_id, None, dry_run)
|
||||
self.register_with_custom_metadata(kind, task_id, None, dry_run, None)
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler, with metadata.
|
||||
///
|
||||
/// If it fails and data was associated with the task, it tries to delete the associated data.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - task_network: network of the task to check.
|
||||
///
|
||||
/// If the task is an import task, only accept it if:
|
||||
///
|
||||
/// 1. There is an ongoing network topology change task
|
||||
/// 2. The task to register matches the network version of the network topology change task
|
||||
///
|
||||
/// Always accept the task if it is not an import task.
|
||||
pub fn register_with_custom_metadata(
|
||||
&self,
|
||||
kind: KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
task_network: Option<TaskNetwork>,
|
||||
) -> Result<Task> {
|
||||
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
|
||||
@@ -779,7 +802,19 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let task = self.queue.register(&mut wtxn, &kind, task_id, custom_metadata, dry_run)?;
|
||||
|
||||
if let Some(TaskNetwork::Import { import_from, network_change, metadata }) = &task_network {
|
||||
self.update_network_task(&mut wtxn, import_from, network_change, metadata)?;
|
||||
}
|
||||
|
||||
let task = self.queue.register(
|
||||
&mut wtxn,
|
||||
&kind,
|
||||
task_id,
|
||||
custom_metadata,
|
||||
dry_run,
|
||||
task_network.map(DbTaskNetwork::from),
|
||||
)?;
|
||||
|
||||
// If the registered task is a task cancelation
|
||||
// we inform the processing tasks to stop (if necessary).
|
||||
@@ -801,6 +836,91 @@ impl IndexScheduler {
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub fn network_no_index_for_remote(
|
||||
&self,
|
||||
remote_name: String,
|
||||
origin: Origin,
|
||||
) -> Result<(), Error> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
self.update_network_task(
|
||||
&mut wtxn,
|
||||
&ImportData { remote_name, index_name: None, document_count: 0 },
|
||||
&origin,
|
||||
&ImportMetadata { index_count: 0, task_key: None, total_index_documents: 0 },
|
||||
)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
// wake up the scheduler as the task state has changed
|
||||
self.scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_network_task(
|
||||
&self,
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
import_from: &ImportData,
|
||||
network_change: &Origin,
|
||||
metadata: &ImportMetadata,
|
||||
) -> Result<(), Error> {
|
||||
let mut network_tasks = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_kind(&*wtxn, meilisearch_types::tasks::Kind::NetworkTopologyChange)?;
|
||||
if network_tasks.is_empty() {
|
||||
return Err(Error::ImportTaskWithoutNetworkTask);
|
||||
}
|
||||
let network_task = {
|
||||
let processing = self.processing_tasks.read().unwrap().processing.clone();
|
||||
if processing.is_disjoint(&network_tasks) {
|
||||
let enqueued = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_status(&*wtxn, meilisearch_types::tasks::Status::Enqueued)?;
|
||||
|
||||
network_tasks &= enqueued;
|
||||
if let Some(network_task) = network_tasks.into_iter().next() {
|
||||
network_task
|
||||
} else {
|
||||
return Err(Error::ImportTaskWithoutNetworkTask);
|
||||
}
|
||||
} else {
|
||||
network_tasks &= &*processing;
|
||||
network_tasks.into_iter().next().unwrap()
|
||||
}
|
||||
};
|
||||
let mut network_task = self.queue.tasks.get_task(&*wtxn, network_task)?.unwrap();
|
||||
let network_task_version = network_task
|
||||
.network
|
||||
.as_ref()
|
||||
.map(|network| network.network_version())
|
||||
.unwrap_or_default();
|
||||
if network_task_version != network_change.network_version {
|
||||
return Err(Error::NetworkVersionMismatch {
|
||||
network_task: network_task_version,
|
||||
import_task: network_change.network_version,
|
||||
});
|
||||
}
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) =
|
||||
&mut network_task.kind
|
||||
else {
|
||||
tracing::error!("unexpected network kind for network task while registering task");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
network_topology_change.receive_remote_task(
|
||||
&import_from.remote_name,
|
||||
import_from.index_name.as_deref(),
|
||||
metadata.task_key,
|
||||
import_from.document_count,
|
||||
metadata.index_count,
|
||||
metadata.total_index_documents,
|
||||
)?;
|
||||
self.queue.tasks.update_task(wtxn, &mut network_task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {
|
||||
|
||||
@@ -42,12 +42,10 @@ impl ProcessingTasks {
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
pub fn stop_processing(&mut self) -> Self {
|
||||
self.progress = None;
|
||||
|
||||
Self {
|
||||
batch: std::mem::take(&mut self.batch),
|
||||
processing: std::mem::take(&mut self.processing),
|
||||
progress: None,
|
||||
progress: std::mem::take(&mut self.progress),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::network::DbTaskNetwork;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@@ -259,6 +260,7 @@ impl Queue {
|
||||
task_id: Option<TaskId>,
|
||||
custom_metadata: Option<String>,
|
||||
dry_run: bool,
|
||||
network: Option<DbTaskNetwork>,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
|
||||
@@ -280,7 +282,7 @@ impl Queue {
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
network: None,
|
||||
network,
|
||||
custom_metadata,
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
@@ -348,6 +350,7 @@ impl Queue {
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -3,7 +3,8 @@ use std::ops::{Bound, RangeBounds};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task};
|
||||
use meilisearch_types::tasks::network::DbTaskNetwork;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -114,14 +115,15 @@ impl TaskQueue {
|
||||
/// - CorruptedTaskQueue: The task doesn't exist in the database
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
|
||||
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
let reprocessing = old_task.status != Status::Enqueued;
|
||||
// network topology tasks may be processed multiple times.
|
||||
let maybe_reprocessing = old_task.status != Status::Enqueued
|
||||
|| task.kind.as_kind() == Kind::NetworkTopologyChange;
|
||||
|
||||
debug_assert!(old_task != *task);
|
||||
debug_assert_eq!(old_task.uid, task.uid);
|
||||
|
||||
// If we're processing a task that failed it may already contains a batch_uid
|
||||
debug_assert!(
|
||||
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
|
||||
maybe_reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
|
||||
"\n==> old: {old_task:?}\n==> new: {task:?}"
|
||||
);
|
||||
|
||||
@@ -143,13 +145,24 @@ impl TaskQueue {
|
||||
})?;
|
||||
}
|
||||
|
||||
// Avoids rewriting part of the network topology change because of TOCTOU errors
|
||||
if let (
|
||||
KindWithContent::NetworkTopologyChange(old_state),
|
||||
KindWithContent::NetworkTopologyChange(new_state),
|
||||
) = (old_task.kind, &mut task.kind)
|
||||
{
|
||||
new_state.merge(old_state);
|
||||
// the state possibly just changed, rewrite the details
|
||||
task.details = Some(new_state.to_details());
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
old_task.enqueued_at, task.enqueued_at,
|
||||
"Cannot update a task's enqueued_at time"
|
||||
);
|
||||
if old_task.started_at != task.started_at {
|
||||
assert!(
|
||||
reprocessing || old_task.started_at.is_none(),
|
||||
maybe_reprocessing || old_task.started_at.is_none(),
|
||||
"Cannot update a task's started_at time"
|
||||
);
|
||||
if let Some(started_at) = old_task.started_at {
|
||||
@@ -161,7 +174,7 @@ impl TaskQueue {
|
||||
}
|
||||
if old_task.finished_at != task.finished_at {
|
||||
assert!(
|
||||
reprocessing || old_task.finished_at.is_none(),
|
||||
maybe_reprocessing || old_task.finished_at.is_none(),
|
||||
"Cannot update a task's finished_at time"
|
||||
);
|
||||
if let Some(finished_at) = old_task.finished_at {
|
||||
@@ -175,7 +188,16 @@ impl TaskQueue {
|
||||
task.network = match (old_task.network, task.network.take()) {
|
||||
(None, None) => None,
|
||||
(None, Some(network)) | (Some(network), None) => Some(network),
|
||||
(Some(_), Some(network)) => Some(network),
|
||||
(Some(left), Some(right)) => Some(match (left, right) {
|
||||
(
|
||||
DbTaskNetwork::Remotes { remote_tasks: mut left, network_version: _ },
|
||||
DbTaskNetwork::Remotes { remote_tasks: mut right, network_version },
|
||||
) => {
|
||||
left.append(&mut right);
|
||||
DbTaskNetwork::Remotes { remote_tasks: left, network_version }
|
||||
}
|
||||
(_, right) => right,
|
||||
}),
|
||||
};
|
||||
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
|
||||
@@ -203,26 +203,30 @@ fn test_disable_auto_deletion_of_tasks() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
}
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function no new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -267,59 +271,69 @@ fn test_auto_deletion_of_tasks() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
}
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
{
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
}
|
||||
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
{
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
}
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
// a new task deletion has been enqueued
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
}
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
{
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks = index_scheduler
|
||||
.queue
|
||||
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
|
||||
.unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -74,6 +74,7 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::NetworkTopologyChange(_)
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
|
||||
}
|
||||
|
||||
27
crates/index-scheduler/src/scheduler/community_edition.rs
Normal file
27
crates/index-scheduler/src/scheduler/community_edition.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::tasks::Task;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::scheduler::process_batch::ProcessBatchInfo;
|
||||
use crate::utils::ProcessingBatch;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_network_index_batch(
|
||||
&self,
|
||||
_network_task: Task,
|
||||
_inner_batch: Box<Batch>,
|
||||
_current_batch: &mut ProcessingBatch,
|
||||
_progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
|
||||
}
|
||||
|
||||
pub(super) fn process_network_ready(
|
||||
&self,
|
||||
_task: Task,
|
||||
_progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ use std::io::ErrorKind;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::network::NetworkTopologyState;
|
||||
use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
@@ -59,6 +60,14 @@ pub(crate) enum Batch {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
#[allow(clippy::enum_variant_names)] // warranted because we are executing an inner index batch
|
||||
NetworkIndexBatch {
|
||||
network_task: Task,
|
||||
inner_batch: Box<Batch>,
|
||||
},
|
||||
NetworkReady {
|
||||
task: Task,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -140,9 +149,14 @@ impl Batch {
|
||||
..
|
||||
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
|
||||
},
|
||||
Batch::IndexSwap { task } => {
|
||||
Batch::IndexSwap { task } | Batch::NetworkReady { task } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::NetworkIndexBatch { network_task, inner_batch } => {
|
||||
let mut tasks = inner_batch.ids();
|
||||
tasks.insert(network_task.uid);
|
||||
tasks
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,12 +170,14 @@ impl Batch {
|
||||
| Dump(_)
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| NetworkReady { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. }
|
||||
| IndexCompaction { index_uid, .. } => Some(index_uid),
|
||||
NetworkIndexBatch { network_task: _, inner_batch } => inner_batch.index_uid(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -184,6 +200,8 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
Batch::NetworkIndexBatch { .. } => f.write_str("NetworkTopologyChange")?,
|
||||
Batch::NetworkReady { .. } => f.write_str("NetworkTopologyChange")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
@@ -452,6 +470,7 @@ impl IndexScheduler {
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing_network_tasks: &RoaringBitmap,
|
||||
) -> Result<Option<(Batch, ProcessingBatch)>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
|
||||
@@ -460,7 +479,6 @@ impl IndexScheduler {
|
||||
let mut current_batch = ProcessingBatch::new(batch_id);
|
||||
|
||||
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let count_total_enqueued = enqueued.len();
|
||||
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
|
||||
// 0. we get the last task to cancel.
|
||||
@@ -509,7 +527,15 @@ impl IndexScheduler {
|
||||
)));
|
||||
}
|
||||
|
||||
// 2. we get the next task to delete
|
||||
// 2. Check for enqueued network topology changes
|
||||
let network_changes = self.queue.tasks.get_kind(rtxn, Kind::NetworkTopologyChange)?
|
||||
& (enqueued | processing_network_tasks);
|
||||
if let Some(task_id) = network_changes.iter().next() {
|
||||
let task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
return self.start_processing_network(rtxn, task, enqueued, current_batch);
|
||||
}
|
||||
|
||||
// 3. we get the next task to delete
|
||||
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
|
||||
if !to_delete.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
|
||||
@@ -519,7 +545,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we get the next task to compact
|
||||
// 4. we get the next task to compact
|
||||
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
|
||||
if let Some(task_id) = to_compact.min() {
|
||||
let mut task =
|
||||
@@ -534,7 +560,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the export.
|
||||
// 5. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
@@ -545,7 +571,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 5. we batch the snapshot.
|
||||
// 6. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -555,7 +581,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 6. we batch the dumps.
|
||||
// 7. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -568,25 +594,66 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
let network = self.network();
|
||||
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
// and use the autobatcher to batch the enqueued tasks associated with it
|
||||
// 8. We make a batch from the unprioritised tasks.
|
||||
let (batch, current_batch) =
|
||||
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
|
||||
// We want to execute all tasks, except those that have a version strictly higher than the network version
|
||||
|
||||
let index_name = if let Some(&index_name) = task.indexes().first() {
|
||||
index_name
|
||||
} else {
|
||||
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexSwap,
|
||||
id: task.uid,
|
||||
});
|
||||
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
|
||||
let Some(task_version) =
|
||||
task.network.as_ref().map(|tastk_network| tastk_network.network_version())
|
||||
else {
|
||||
// do not skip tasks that have no network version, otherwise we will never execute them
|
||||
return false;
|
||||
};
|
||||
|
||||
// skip tasks with a version strictly higher than the network version
|
||||
task_version > network.version
|
||||
})?;
|
||||
Ok(batch.map(|batch| (batch, current_batch)))
|
||||
}
|
||||
|
||||
fn create_next_batch_unprioritized<F>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
enqueued: &RoaringBitmap,
|
||||
mut current_batch: ProcessingBatch,
|
||||
mut skip_if: F,
|
||||
) -> Result<(Option<Batch>, ProcessingBatch)>
|
||||
where
|
||||
F: FnMut(&Task) -> bool,
|
||||
{
|
||||
let count_total_enqueued = enqueued.len();
|
||||
|
||||
let mut enqueued_it = enqueued.iter();
|
||||
let mut task;
|
||||
let index_name = loop {
|
||||
let Some(task_id) = enqueued_it.next() else {
|
||||
return Ok((None, current_batch));
|
||||
};
|
||||
task = self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
if skip_if(&task) {
|
||||
continue;
|
||||
}
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
// and use the autobatcher to batch the enqueued tasks associated with it
|
||||
|
||||
if let Some(&index_name) = task.indexes().first() {
|
||||
break index_name;
|
||||
} else {
|
||||
assert!(
|
||||
matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())
|
||||
);
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexSwap,
|
||||
id: task.uid,
|
||||
});
|
||||
return Ok((Some(Batch::IndexSwap { task }), current_batch));
|
||||
};
|
||||
};
|
||||
|
||||
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
|
||||
@@ -621,6 +688,10 @@ impl IndexScheduler {
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if skip_if(&task) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = match self.queue.file_store.compute_size(uuid) {
|
||||
Ok(content_size) => content_size,
|
||||
@@ -651,19 +722,116 @@ impl IndexScheduler {
|
||||
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
|
||||
{
|
||||
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
|
||||
return Ok(self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_name.to_string(),
|
||||
batchkind,
|
||||
&mut current_batch,
|
||||
create_index,
|
||||
)?
|
||||
.map(|batch| (batch, current_batch)));
|
||||
let batch = self.create_next_batch_index(
|
||||
rtxn,
|
||||
index_name.to_string(),
|
||||
batchkind,
|
||||
&mut current_batch,
|
||||
create_index,
|
||||
)?;
|
||||
return Ok((batch, current_batch));
|
||||
}
|
||||
|
||||
// If we found no tasks then we were notified for something that got autobatched
|
||||
// somehow and there is nothing to do.
|
||||
Ok(None)
|
||||
Ok((None, current_batch))
|
||||
}
|
||||
|
||||
fn start_processing_network(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
mut task: Task,
|
||||
enqueued: &RoaringBitmap,
|
||||
mut current_batch: ProcessingBatch,
|
||||
) -> Result<Option<(Batch, ProcessingBatch)>> {
|
||||
current_batch.processing(Some(&mut task));
|
||||
|
||||
let change_version =
|
||||
task.network.as_ref().map(|network| network.network_version()).unwrap_or_default();
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) = &task.kind else {
|
||||
panic!("inconsistent kind with content")
|
||||
};
|
||||
|
||||
match network_topology_change.state() {
|
||||
NetworkTopologyState::WaitingForOlderTasks => {
|
||||
let res =
|
||||
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
|
||||
// in this limited mode of execution, we only want to run tasks:
|
||||
// 0. with an index
|
||||
// 1. with a version
|
||||
// 2. that version strictly lower than the network task version
|
||||
|
||||
// 0. skip indexless tasks that are not index swap
|
||||
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 1. skip tasks without version
|
||||
let Some(task_version) =
|
||||
task.network.as_ref().map(|network| network.network_version())
|
||||
else {
|
||||
return true;
|
||||
};
|
||||
|
||||
// 2. skip tasks with a version equal or higher to the network task version
|
||||
task_version >= change_version
|
||||
});
|
||||
|
||||
let (batch, current_batch) = res?;
|
||||
|
||||
let batch = match batch {
|
||||
Some(batch) => {
|
||||
let inner_batch = Box::new(batch);
|
||||
|
||||
Batch::NetworkIndexBatch { network_task: task, inner_batch }
|
||||
}
|
||||
None => Batch::NetworkReady { task },
|
||||
};
|
||||
|
||||
Ok(Some((batch, current_batch)))
|
||||
}
|
||||
NetworkTopologyState::ImportingDocuments => {
|
||||
// if the import is done we need to go to the next state
|
||||
if network_topology_change.is_import_finished() {
|
||||
return Ok(Some((Batch::NetworkReady { task }, current_batch)));
|
||||
}
|
||||
|
||||
let res =
|
||||
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
|
||||
// in this limited mode of execution, we only want to run tasks:
|
||||
// 0. with an index
|
||||
// 1. with a version
|
||||
// 2. that version equal to the network task version
|
||||
|
||||
// 0. skip indexless tasks
|
||||
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 1. skip tasks without version
|
||||
let Some(task_version) =
|
||||
task.network.as_ref().map(|network| network.network_version())
|
||||
else {
|
||||
return true;
|
||||
};
|
||||
|
||||
// 2. skip tasks with a version different from the network task version
|
||||
task_version != change_version
|
||||
});
|
||||
|
||||
let (batch, current_batch) = res?;
|
||||
|
||||
let batch = batch.map(|batch| {
|
||||
let inner_batch = Box::new(batch);
|
||||
|
||||
(Batch::NetworkIndexBatch { network_task: task, inner_batch }, current_batch)
|
||||
});
|
||||
|
||||
Ok(batch)
|
||||
}
|
||||
NetworkTopologyState::ExportingDocuments | NetworkTopologyState::Finished => {
|
||||
Ok(Some((Batch::NetworkReady { task }, current_batch)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
301
crates/index-scheduler/src/scheduler/enterprise_edition/mod.rs
Normal file
301
crates/index-scheduler/src/scheduler/enterprise_edition/mod.rs
Normal file
@@ -0,0 +1,301 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::milli::documents::PrimaryKey;
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::new::indexer;
|
||||
use meilisearch_types::milli::update::new::indexer::current_edition::sharding::Shards;
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::network::Remote;
|
||||
use meilisearch_types::tasks::network::{NetworkTopologyState, Origin};
|
||||
use meilisearch_types::tasks::{KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::scheduler::process_batch::ProcessBatchInfo;
|
||||
use crate::scheduler::process_export::{ExportContext, ExportOptions, TargetInstance};
|
||||
use crate::utils::ProcessingBatch;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_network_index_batch(
|
||||
&self,
|
||||
mut network_task: Task,
|
||||
inner_batch: Box<Batch>,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
let (mut tasks, info) = self.process_batch(*inner_batch, current_batch, progress)?;
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) =
|
||||
&mut network_task.kind
|
||||
else {
|
||||
tracing::error!("unexpected network kind for network task while processing batch");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
for task in &tasks {
|
||||
let Some(network) = task.network.as_ref() else {
|
||||
continue;
|
||||
};
|
||||
let Some(import) = network.import_data() else {
|
||||
continue;
|
||||
};
|
||||
if let Some(index_name) = import.index_name.as_deref() {
|
||||
network_topology_change.process_remote_tasks(
|
||||
&import.remote_name,
|
||||
index_name,
|
||||
import.document_count,
|
||||
);
|
||||
}
|
||||
}
|
||||
network_task.details = Some(network_topology_change.to_details());
|
||||
|
||||
tasks.push(network_task);
|
||||
Ok((tasks, info))
|
||||
}
|
||||
|
||||
pub(super) fn process_network_ready(
|
||||
&self,
|
||||
mut task: Task,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
let KindWithContent::NetworkTopologyChange(network_topology_change) = &mut task.kind else {
|
||||
tracing::error!("network topology change task has the wrong kind with content");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
|
||||
let Some(task_network) = &task.network else {
|
||||
tracing::error!("network topology change task has no network");
|
||||
return Err(Error::CorruptedTaskQueue);
|
||||
};
|
||||
|
||||
let origin;
|
||||
let origin = match task_network.origin() {
|
||||
Some(origin) => origin,
|
||||
None => {
|
||||
let myself = network_topology_change.in_name().expect("origin is not the leader");
|
||||
origin = Origin {
|
||||
remote_name: myself.to_string(),
|
||||
task_uid: task.uid,
|
||||
network_version: task_network.network_version(),
|
||||
};
|
||||
&origin
|
||||
}
|
||||
};
|
||||
|
||||
if let Some((remotes, out_name)) = network_topology_change.export_to_process() {
|
||||
let moved_documents = self.balance_documents(
|
||||
remotes,
|
||||
out_name,
|
||||
network_topology_change.in_name(),
|
||||
origin,
|
||||
&progress,
|
||||
&self.scheduler.must_stop_processing,
|
||||
)?;
|
||||
network_topology_change.set_moved(moved_documents);
|
||||
}
|
||||
network_topology_change.update_state();
|
||||
if network_topology_change.state() == NetworkTopologyState::Finished {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
task.details = Some(network_topology_change.to_details());
|
||||
Ok((vec![task], Default::default()))
|
||||
}
|
||||
|
||||
fn balance_documents(
|
||||
&self,
|
||||
remotes: &BTreeMap<String, Remote>,
|
||||
out_name: &str,
|
||||
in_name: Option<&str>,
|
||||
network_change_origin: &Origin,
|
||||
progress: &Progress,
|
||||
must_stop_processing: &crate::scheduler::MustStopProcessing,
|
||||
) -> crate::Result<u64> {
|
||||
let new_shards =
|
||||
Shards::from_remotes_local(remotes.keys().map(String::as_str).chain(in_name), in_name);
|
||||
|
||||
// TECHDEBT: this spawns a `ureq` agent additionally to `reqwest`. We probably want to harmonize all of this.
|
||||
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||
|
||||
let mut indexer_alloc = Bump::new();
|
||||
|
||||
let scheduler_rtxn = self.env.read_txn()?;
|
||||
|
||||
let index_count = self.index_mapper.index_count(&scheduler_rtxn)?;
|
||||
|
||||
// when the instance is empty, we still need to tell that to remotes, as they cannot know of that fact and will be waiting for
|
||||
// data
|
||||
if index_count == 0 {
|
||||
for (remote_name, remote) in remotes {
|
||||
let target = TargetInstance {
|
||||
remote_name: Some(remote_name),
|
||||
base_url: &remote.url,
|
||||
api_key: remote.write_api_key.as_deref(),
|
||||
};
|
||||
|
||||
let res = self.export_no_index(
|
||||
target,
|
||||
out_name,
|
||||
network_change_origin,
|
||||
&agent,
|
||||
must_stop_processing,
|
||||
);
|
||||
|
||||
if let Err(err) = res {
|
||||
tracing::warn!("Could not signal not to wait documents to `{remote_name}` due to error: {err}");
|
||||
}
|
||||
}
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut total_moved_documents = 0;
|
||||
|
||||
self.index_mapper.try_for_each_index::<(), ()>(
|
||||
&scheduler_rtxn,
|
||||
|index_uid, index| -> crate::Result<()> {
|
||||
indexer_alloc.reset();
|
||||
let err = |err| Error::from_milli(err, Some(index_uid.to_string()));
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let all_docids = index.external_documents_ids();
|
||||
let mut documents_to_move_to =
|
||||
hashbrown::HashMap::<String, RoaringBitmap>::new();
|
||||
let mut documents_to_delete = RoaringBitmap::new();
|
||||
|
||||
for res in all_docids.iter(&index_rtxn)? {
|
||||
let (external_docid, docid) = res?;
|
||||
match new_shards.processing_shard(external_docid) {
|
||||
Some(shard) if shard.is_own => continue,
|
||||
Some(shard) => {
|
||||
documents_to_move_to.entry_ref(&shard.name).or_default().insert(docid);
|
||||
}
|
||||
None => {
|
||||
documents_to_delete.insert(docid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||
|
||||
for (remote_name, remote) in remotes {
|
||||
let documents_to_move =
|
||||
documents_to_move_to.remove(remote_name).unwrap_or_default();
|
||||
|
||||
let target = TargetInstance {
|
||||
remote_name: Some(remote_name),
|
||||
base_url: &remote.url,
|
||||
api_key: remote.write_api_key.as_deref(),
|
||||
};
|
||||
let options = ExportOptions {
|
||||
index_uid,
|
||||
payload_size: None,
|
||||
override_settings: false,
|
||||
export_mode: super::process_export::ExportMode::NetworkBalancing {
|
||||
index_count,
|
||||
export_old_remote_name: out_name,
|
||||
network_change_origin,
|
||||
},
|
||||
};
|
||||
let ctx = ExportContext {
|
||||
index,
|
||||
index_rtxn: &index_rtxn,
|
||||
universe: &documents_to_move,
|
||||
progress,
|
||||
agent: &agent,
|
||||
must_stop_processing,
|
||||
};
|
||||
|
||||
let res = self.export_one_index(target, options, ctx);
|
||||
|
||||
match res {
|
||||
Ok(_) =>{ documents_to_delete |= documents_to_move;}
|
||||
Err(err) => {
|
||||
tracing::warn!("Could not export documents to `{remote_name}` due to error: {err}\n - Note: Documents will be kept");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if documents_to_delete.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
total_moved_documents += documents_to_delete.len();
|
||||
|
||||
self.delete_documents_from_index(progress, must_stop_processing, &indexer_alloc, index_uid, index, &err, index_rtxn, documents_to_delete, fields_ids_map)
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(total_moved_documents)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn delete_documents_from_index(
|
||||
&self,
|
||||
progress: &Progress,
|
||||
must_stop_processing: &super::MustStopProcessing,
|
||||
indexer_alloc: &Bump,
|
||||
index_uid: &str,
|
||||
index: &milli::Index,
|
||||
err: &impl Fn(milli::Error) -> Error,
|
||||
index_rtxn: milli::heed::RoTxn<'_, milli::heed::WithoutTls>,
|
||||
documents_to_delete: RoaringBitmap,
|
||||
fields_ids_map: milli::FieldsIdsMap,
|
||||
) -> std::result::Result<(), Error> {
|
||||
let mut new_fields_ids_map = fields_ids_map.clone();
|
||||
|
||||
// candidates not empty => index not empty => a primary key is set
|
||||
let primary_key = index.primary_key(&index_rtxn)?.unwrap();
|
||||
|
||||
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(milli::Error::from)
|
||||
.map_err(err)?;
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
|
||||
let mut indexer = indexer::DocumentDeletion::new();
|
||||
indexer.delete_documents_by_docids(documents_to_delete);
|
||||
let document_changes = indexer.into_changes(indexer_alloc, primary_key);
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(&index_wtxn)
|
||||
.map_err(milli::Error::from)
|
||||
.map_err(err)?;
|
||||
let embedders = self.embedders(index_uid.to_string(), embedders)?;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
indexer::index(
|
||||
&mut index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&EmbedderStats::default(),
|
||||
)
|
||||
.map_err(err)?;
|
||||
|
||||
// update stats
|
||||
let mut mapper_wtxn = self.env.write_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(index, &index_wtxn).map_err(err)?;
|
||||
self.index_mapper.store_stats_of(&mut mapper_wtxn, index_uid, &stats)?;
|
||||
|
||||
index_wtxn.commit()?;
|
||||
// update stats after committing changes to index
|
||||
mapper_wtxn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,12 @@
|
||||
mod autobatcher;
|
||||
#[cfg(test)]
|
||||
mod autobatcher_test;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
mod community_edition;
|
||||
mod create_batch;
|
||||
#[cfg(feature = "enterprise")]
|
||||
mod enterprise_edition;
|
||||
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_export;
|
||||
@@ -21,7 +26,6 @@ use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
@@ -133,6 +137,7 @@ impl Scheduler {
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit,
|
||||
export_default_payload_size_bytes: _,
|
||||
instance_features: _,
|
||||
auto_upgrade: _,
|
||||
embedding_cache_cap,
|
||||
@@ -178,6 +183,8 @@ impl IndexScheduler {
|
||||
self.breakpoint(crate::test_utils::Breakpoint::Start);
|
||||
}
|
||||
|
||||
let previous_processing_batch = self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
if self.cleanup_enabled {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.queue.cleanup_task_queue(&mut wtxn)?;
|
||||
@@ -185,11 +192,16 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
let (batch, mut processing_batch) =
|
||||
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
|
||||
Some(batch) => batch,
|
||||
None => return Ok(TickOutcome::WaitForSignal),
|
||||
};
|
||||
let (batch, mut processing_batch) = match self
|
||||
.create_next_batch(&rtxn, &previous_processing_batch.processing)
|
||||
.map_err(|e| Error::CreateBatch(Box::new(e)))?
|
||||
{
|
||||
Some(batch) => batch,
|
||||
None => {
|
||||
*self.processing_tasks.write().unwrap() = previous_processing_batch;
|
||||
return Ok(TickOutcome::WaitForSignal);
|
||||
}
|
||||
};
|
||||
let index_uid = batch.index_uid().map(ToOwned::to_owned);
|
||||
drop(rtxn);
|
||||
|
||||
@@ -260,7 +272,14 @@ impl IndexScheduler {
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
||||
|
||||
processing_batch.finished();
|
||||
// whether the batch made progress.
|
||||
// a batch make progress if it failed or if it contains at least one fully processed (or cancelled) task.
|
||||
//
|
||||
// if a batch did not make progress, it means that all of its tasks are waiting on the scheduler to make progress,
|
||||
// and so we must wait for new tasks. Such a batch is not persisted to DB, and is resumed on the next tick.
|
||||
let mut batch_made_progress = false;
|
||||
let mut stop_scheduler_forever = false;
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
@@ -281,7 +300,11 @@ impl IndexScheduler {
|
||||
#[allow(unused_variables)]
|
||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
processing_batch.update(&mut task);
|
||||
processing_batch.update_from_task(&task);
|
||||
if !matches!(task.status, Status::Processing | Status::Enqueued) {
|
||||
batch_made_progress = true;
|
||||
processing_batch.finish_task(&mut task);
|
||||
}
|
||||
if task.status == Status::Canceled {
|
||||
canceled.insert(task.uid);
|
||||
canceled_by = task.canceled_by;
|
||||
@@ -348,6 +371,9 @@ impl IndexScheduler {
|
||||
}
|
||||
// In case of a failure we must get back and patch all the tasks with the error.
|
||||
Err(err) => {
|
||||
// always persist failed batches
|
||||
batch_made_progress = true;
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||
@@ -371,7 +397,10 @@ impl IndexScheduler {
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(error.clone());
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
processing_batch.update(&mut task);
|
||||
processing_batch.update_from_task(&task);
|
||||
if !matches!(task.status, Status::Processing | Status::Enqueued) {
|
||||
processing_batch.finish_task(&mut task);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
@@ -394,44 +423,12 @@ impl IndexScheduler {
|
||||
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
|
||||
process_batch_info;
|
||||
|
||||
processing_batch.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
let mut congestion_info = serde_json::Map::new();
|
||||
congestion_info.insert("attempts".into(), congestion.attempts.into());
|
||||
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
|
||||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
use byte_unit::Byte;
|
||||
use byte_unit::UnitType::Binary;
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((
|
||||
dbname.to_case(Case::Camel),
|
||||
format!("{post:#.2} ({sign}{diff:#.2})").into(),
|
||||
))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
processing_batch.write_stats(
|
||||
&progress,
|
||||
congestion,
|
||||
pre_commit_dabases_sizes,
|
||||
post_commit_dabases_sizes,
|
||||
);
|
||||
|
||||
if let Some(congestion) = congestion {
|
||||
tracing::debug!(
|
||||
@@ -444,46 +441,49 @@ impl IndexScheduler {
|
||||
|
||||
tracing::debug!("call trace: {:?}", progress.accumulated_durations());
|
||||
|
||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
if batch_made_progress {
|
||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
if batch_made_progress {
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
|
||||
let idx = AtomicU32::new(0);
|
||||
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
|
||||
let rtxn = self.read_txn()?;
|
||||
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
|
||||
tracing::error!(
|
||||
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
|
||||
let idx = AtomicU32::new(0);
|
||||
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
|
||||
let rtxn = self.read_txn()?;
|
||||
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
|
||||
tracing::error!(
|
||||
"Failure to delete the content files associated with task {}. Error: {e}",
|
||||
task.uid
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
self.notify_webhooks(ids);
|
||||
self.notify_webhooks(ids);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
|
||||
|
||||
if stop_scheduler_forever {
|
||||
Ok(TickOutcome::StopProcessingForever)
|
||||
} else {
|
||||
|
||||
@@ -539,6 +539,10 @@ impl IndexScheduler {
|
||||
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::NetworkIndexBatch { network_task, inner_batch } => {
|
||||
self.process_network_index_batch(network_task, inner_batch, current_batch, progress)
|
||||
}
|
||||
Batch::NetworkReady { task } => self.process_network_ready(task, progress),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{self, Write as _};
|
||||
use std::ops::ControlFlow;
|
||||
use std::sync::atomic;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -7,6 +8,7 @@ use backoff::ExponentialBackoff;
|
||||
use byte_unit::Byte;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
@@ -15,7 +17,10 @@ use meilisearch_types::milli::update::{request_threads, Setting};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
|
||||
use meilisearch_types::settings::{self, SecretPolicy};
|
||||
use meilisearch_types::tasks::network::headers::SetHeader as _;
|
||||
use meilisearch_types::tasks::network::{headers, ImportData, ImportMetadata, Origin};
|
||||
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Deserialize;
|
||||
use ureq::{json, Response};
|
||||
|
||||
@@ -50,6 +55,7 @@ impl IndexScheduler {
|
||||
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
|
||||
let err = |err| Error::from_milli(err, Some(uid.to_string()));
|
||||
if must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
@@ -61,261 +67,474 @@ impl IndexScheduler {
|
||||
));
|
||||
|
||||
let ExportIndexSettings { filter, override_settings } = export_settings;
|
||||
|
||||
let index = self.index(uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
|
||||
// First, check if the index already exists
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
let response = retry(&must_stop_processing, || {
|
||||
let mut request = agent.get(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
|
||||
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||
});
|
||||
let index_exists = match response {
|
||||
Ok(response) => response.status() == 200,
|
||||
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
|
||||
false
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
let primary_key = index
|
||||
.primary_key(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Create the index
|
||||
if !index_exists {
|
||||
let url = format!("{base_url}/indexes");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Patch the index primary key
|
||||
if index_exists && *override_settings {
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Send the index settings
|
||||
if !index_exists || *override_settings {
|
||||
let mut settings =
|
||||
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// Remove the experimental chat setting if not enabled
|
||||
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||
settings.chat = Setting::NotSet;
|
||||
}
|
||||
// Retry logic for sending settings
|
||||
let url = format!("{base_url}/indexes/{uid}/settings");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_json(settings.clone()).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
let filter = filter
|
||||
.as_ref()
|
||||
.map(Filter::from_json)
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
.flatten();
|
||||
|
||||
let filter_universe = filter
|
||||
.map(|f| f.evaluate(&index_rtxn, &index))
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
let whole_universe = index
|
||||
.documents_ids(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
let filter = filter.as_ref().map(Filter::from_json).transpose().map_err(err)?.flatten();
|
||||
let filter_universe =
|
||||
filter.map(|f| f.evaluate(&index_rtxn, &index)).transpose().map_err(err)?;
|
||||
let whole_universe =
|
||||
index.documents_ids(&index_rtxn).map_err(milli::Error::from).map_err(err)?;
|
||||
let universe = filter_universe.unwrap_or(whole_universe);
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// We don't need to keep this one alive as we will
|
||||
// spawn many threads to process the documents
|
||||
drop(index_rtxn);
|
||||
|
||||
let total_documents = universe.len() as u32;
|
||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||
progress.update_progress(progress_step);
|
||||
let target = TargetInstance { remote_name: None, base_url, api_key };
|
||||
let ctx = ExportContext {
|
||||
index: &index,
|
||||
index_rtxn: &index_rtxn,
|
||||
universe: &universe,
|
||||
progress: &progress,
|
||||
agent: &agent,
|
||||
must_stop_processing: &must_stop_processing,
|
||||
};
|
||||
let options = ExportOptions {
|
||||
index_uid: uid,
|
||||
payload_size,
|
||||
override_settings: *override_settings,
|
||||
export_mode: ExportMode::ExportRoute,
|
||||
};
|
||||
let total_documents = self.export_one_index(target, options, ctx)?;
|
||||
|
||||
output.insert(
|
||||
IndexUidPattern::new_unchecked(uid.clone()),
|
||||
DetailsExportIndexSettings {
|
||||
settings: (*export_settings).clone(),
|
||||
matched_documents: Some(total_documents as u64),
|
||||
matched_documents: Some(total_documents),
|
||||
},
|
||||
);
|
||||
|
||||
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
|
||||
let documents_url = format!("{base_url}/indexes/{uid}/documents");
|
||||
|
||||
let results = request_threads()
|
||||
.broadcast(|ctx| {
|
||||
let index_rtxn = index
|
||||
.read_txn()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let mut compressed_buffer = Vec::new();
|
||||
for (i, docid) in universe.iter().enumerate() {
|
||||
if i % ctx.num_threads() != ctx.index() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let document = index
|
||||
.document(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
// TODO definitely factorize this code
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(Error::from_milli(
|
||||
milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(
|
||||
&index_rtxn,
|
||||
std::iter::once(docid),
|
||||
)
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={docid}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
),
|
||||
Some(uid.to_string()),
|
||||
));
|
||||
};
|
||||
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buffer.clear();
|
||||
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||
.map_err(milli::InternalError::from)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Make sure we put at least one document in the buffer even
|
||||
// though we might go above the buffer limit before sending
|
||||
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||
// We compress the documents before sending them
|
||||
let mut encoder =
|
||||
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||
encoder
|
||||
.write_all(&buffer)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
encoder
|
||||
.finish()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
request = request.set("Content-Encoding", "gzip");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
buffer.clear();
|
||||
compressed_buffer.clear();
|
||||
}
|
||||
buffer.extend_from_slice(&tmp_buffer);
|
||||
|
||||
if i > 0 && i % 100 == 0 {
|
||||
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_bytes(&buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| {
|
||||
Error::from_milli(
|
||||
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
|
||||
Some(uid.to_string()),
|
||||
)
|
||||
})?;
|
||||
for result in results {
|
||||
result?;
|
||||
}
|
||||
|
||||
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub(super) fn export_one_index(
|
||||
&self,
|
||||
target: TargetInstance<'_>,
|
||||
options: ExportOptions<'_>,
|
||||
ctx: ExportContext<'_>,
|
||||
) -> Result<u64, Error> {
|
||||
let err = |err| Error::from_milli(err, Some(options.index_uid.to_string()));
|
||||
let total_index_documents = ctx.universe.len();
|
||||
let task_network = options.task_network(total_index_documents);
|
||||
|
||||
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
let url = format!(
|
||||
"{base_url}/indexes/{index_uid}",
|
||||
base_url = target.base_url,
|
||||
index_uid = options.index_uid
|
||||
);
|
||||
let response = retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.get(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
|
||||
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||
});
|
||||
let index_exists = match response {
|
||||
Ok(response) => response.status() == 200,
|
||||
Err(Error::FromRemoteWhenExporting { code, .. })
|
||||
if code == Code::IndexNotFound.name() =>
|
||||
{
|
||||
false
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
let primary_key =
|
||||
ctx.index.primary_key(ctx.index_rtxn).map_err(milli::Error::from).map_err(err)?;
|
||||
if !index_exists {
|
||||
let url = format!("{base_url}/indexes", base_url = target.base_url);
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.post(&url);
|
||||
|
||||
if let Some((import_data, origin, metadata)) = &task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param =
|
||||
json!({ "uid": options.index_uid, "primaryKey": primary_key });
|
||||
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
}
|
||||
if index_exists && options.override_settings {
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.patch(&url);
|
||||
if let Some((import_data, origin, metadata)) = &task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
}
|
||||
if !index_exists || options.override_settings {
|
||||
let mut settings =
|
||||
settings::settings(ctx.index, ctx.index_rtxn, SecretPolicy::RevealSecrets)
|
||||
.map_err(err)?;
|
||||
// Remove the experimental chat setting if not enabled
|
||||
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||
settings.chat = Setting::NotSet;
|
||||
}
|
||||
// Retry logic for sending settings
|
||||
let url = format!(
|
||||
"{base_url}/indexes/{index_uid}/settings",
|
||||
base_url = target.base_url,
|
||||
index_uid = options.index_uid
|
||||
);
|
||||
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(ctx.must_stop_processing, || {
|
||||
let mut request = ctx.agent.patch(&url);
|
||||
|
||||
if let Some((import_data, origin, metadata)) = &task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_json(settings.clone()).map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
}
|
||||
|
||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.index_rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let total_documents = ctx.universe.len() as u32;
|
||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||
ctx.progress.update_progress(progress_step);
|
||||
|
||||
let limit = options
|
||||
.payload_size
|
||||
.map(|ps| ps.as_u64() as usize)
|
||||
.unwrap_or(self.export_default_payload_size_bytes.as_u64() as usize);
|
||||
let documents_url = format!(
|
||||
"{base_url}/indexes/{index_uid}/documents",
|
||||
base_url = target.base_url,
|
||||
index_uid = options.index_uid
|
||||
);
|
||||
|
||||
// no document to send, but we must still send a task when performing network balancing
|
||||
if ctx.universe.is_empty() {
|
||||
if let Some((import_data, network_change_origin, metadata)) = task_network {
|
||||
let mut compressed_buffer = Vec::new();
|
||||
// ignore control flow, we're returning anyway
|
||||
let _ = send_buffer(
|
||||
b" ", // needs something otherwise meili complains about missing payload
|
||||
&mut compressed_buffer,
|
||||
ctx.must_stop_processing,
|
||||
ctx.agent,
|
||||
&documents_url,
|
||||
target.remote_name,
|
||||
bearer.as_deref(),
|
||||
Some(&(import_data, network_change_origin.clone(), metadata)),
|
||||
&err,
|
||||
)?;
|
||||
}
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let results = request_threads()
|
||||
.broadcast(|broadcast| {
|
||||
let mut task_network = options.task_network(total_index_documents);
|
||||
|
||||
let index_rtxn = ctx.index.read_txn().map_err(milli::Error::from).map_err(err)?;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let mut compressed_buffer = Vec::new();
|
||||
for (i, docid) in ctx.universe.iter().enumerate() {
|
||||
if i % broadcast.num_threads() != broadcast.index() {
|
||||
continue;
|
||||
}
|
||||
if let Some((import_data, _, metadata)) = &mut task_network {
|
||||
import_data.document_count += 1;
|
||||
metadata.task_key = Some(docid);
|
||||
}
|
||||
|
||||
let document = ctx.index.document(&index_rtxn, docid).map_err(err)?;
|
||||
|
||||
let mut document =
|
||||
obkv_to_json(&all_fields, &fields_ids_map, document).map_err(err)?;
|
||||
|
||||
// TODO definitely factorize this code
|
||||
'inject_vectors: {
|
||||
let embeddings = ctx.index.embeddings(&index_rtxn, docid).map_err(err)?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(err(milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = ctx
|
||||
.index
|
||||
.external_id_of(&index_rtxn, std::iter::once(docid))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={docid}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
)));
|
||||
};
|
||||
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
};
|
||||
vectors
|
||||
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buffer.clear();
|
||||
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||
.map_err(milli::InternalError::from)
|
||||
.map_err(milli::Error::from)
|
||||
.map_err(err)?;
|
||||
|
||||
// Make sure we put at least one document in the buffer even
|
||||
// though we might go above the buffer limit before sending
|
||||
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||
let control_flow = send_buffer(
|
||||
&buffer,
|
||||
&mut compressed_buffer,
|
||||
ctx.must_stop_processing,
|
||||
ctx.agent,
|
||||
&documents_url,
|
||||
target.remote_name,
|
||||
bearer.as_deref(),
|
||||
task_network.as_ref(),
|
||||
&err,
|
||||
)?;
|
||||
buffer.clear();
|
||||
compressed_buffer.clear();
|
||||
if let Some((import_data, _, metadata)) = &mut task_network {
|
||||
import_data.document_count = 0;
|
||||
metadata.task_key = None;
|
||||
}
|
||||
if control_flow.is_break() {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
buffer.extend_from_slice(&tmp_buffer);
|
||||
|
||||
if i > 0 && i % 100 == 0 {
|
||||
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
// send the last buffered documents if any
|
||||
if !buffer.is_empty() {
|
||||
// ignore control flow here
|
||||
let _ = send_buffer(
|
||||
&buffer,
|
||||
&mut compressed_buffer,
|
||||
ctx.must_stop_processing,
|
||||
ctx.agent,
|
||||
&documents_url,
|
||||
target.remote_name,
|
||||
bearer.as_deref(),
|
||||
task_network.as_ref(),
|
||||
&err,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| err(milli::Error::InternalError(InternalError::PanicInThreadPool(e))))?;
|
||||
for result in results {
|
||||
result?;
|
||||
}
|
||||
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||
Ok(total_documents as u64)
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
|
||||
pub(super) fn export_no_index(
|
||||
&self,
|
||||
target: TargetInstance<'_>,
|
||||
export_old_remote_name: &str,
|
||||
network_change_origin: &Origin,
|
||||
agent: &ureq::Agent,
|
||||
must_stop_processing: &MustStopProcessing,
|
||||
) -> Result<(), Error> {
|
||||
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
let url = format!("{base_url}/network", base_url = target.base_url,);
|
||||
|
||||
{
|
||||
let _ = handle_response(
|
||||
target.remote_name,
|
||||
retry(must_stop_processing, || {
|
||||
let request = agent.patch(&url);
|
||||
let mut request = set_network_ureq_headers(
|
||||
request,
|
||||
&ImportData {
|
||||
remote_name: export_old_remote_name.to_string(),
|
||||
index_name: None,
|
||||
document_count: 0,
|
||||
},
|
||||
network_change_origin,
|
||||
&ImportMetadata {
|
||||
index_count: 0,
|
||||
task_key: None,
|
||||
total_index_documents: 0,
|
||||
},
|
||||
);
|
||||
request = request.set("Content-Type", "application/json");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request
|
||||
.send_json(
|
||||
// empty payload that will be disregarded
|
||||
serde_json::Value::Object(Default::default()),
|
||||
)
|
||||
.map_err(into_backoff_error)
|
||||
}),
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn set_network_ureq_headers(
|
||||
request: ureq::Request,
|
||||
import_data: &ImportData,
|
||||
origin: &Origin,
|
||||
metadata: &ImportMetadata,
|
||||
) -> ureq::Request {
|
||||
let request = RequestWrapper(request);
|
||||
|
||||
let ImportMetadata { index_count, task_key, total_index_documents } = metadata;
|
||||
let Origin { remote_name: origin_remote, task_uid, network_version } = origin;
|
||||
let ImportData { remote_name: import_remote, index_name, document_count } = import_data;
|
||||
|
||||
let request = request
|
||||
.set_origin_remote(origin_remote)
|
||||
.set_origin_task_uid(*task_uid)
|
||||
.set_origin_network_version(*network_version)
|
||||
.set_import_remote(import_remote)
|
||||
.set_import_docs(*document_count)
|
||||
.set_import_index_count(*index_count)
|
||||
.set_import_index_docs(*total_index_documents);
|
||||
|
||||
let request = if let Some(index_name) = index_name.as_deref() {
|
||||
request.set_import_index(index_name)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
let RequestWrapper(request) = if let Some(task_key) = task_key {
|
||||
request.set_import_task_key(*task_key)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
request
|
||||
}
|
||||
|
||||
struct RequestWrapper(ureq::Request);
|
||||
impl headers::SetHeader for RequestWrapper {
|
||||
fn set_header(self, name: &str, value: &str) -> Self {
|
||||
Self(self.0.set(name, value))
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn send_buffer<'a>(
|
||||
buffer: &'a [u8],
|
||||
mut compressed_buffer: &'a mut Vec<u8>,
|
||||
must_stop_processing: &MustStopProcessing,
|
||||
agent: &ureq::Agent,
|
||||
documents_url: &'a str,
|
||||
remote_name: Option<&str>,
|
||||
bearer: Option<&'a str>,
|
||||
task_network: Option<&(ImportData, Origin, ImportMetadata)>,
|
||||
err: &'a impl Fn(milli::Error) -> crate::Error,
|
||||
) -> Result<ControlFlow<(), ()>> {
|
||||
// We compress the documents before sending them
|
||||
let mut encoder: GzEncoder<&mut &mut Vec<u8>> =
|
||||
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||
encoder.write_all(buffer).map_err(milli::Error::from).map_err(err)?;
|
||||
encoder.finish().map_err(milli::Error::from).map_err(err)?;
|
||||
|
||||
let res = retry(must_stop_processing, || {
|
||||
let mut request = agent.post(documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
request = request.set("Content-Encoding", "gzip");
|
||||
if let Some(bearer) = bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
if let Some((import_data, origin, metadata)) = task_network {
|
||||
request = set_network_ureq_headers(request, import_data, origin, metadata);
|
||||
}
|
||||
request.send_bytes(compressed_buffer).map_err(into_backoff_error)
|
||||
});
|
||||
|
||||
handle_response(remote_name, res)
|
||||
}
|
||||
|
||||
fn handle_response(remote_name: Option<&str>, res: Result<Response>) -> Result<ControlFlow<()>> {
|
||||
let remote_name = remote_name.unwrap_or("unnamed");
|
||||
match res {
|
||||
Ok(_response) => Ok(ControlFlow::Continue(())),
|
||||
Err(Error::FromRemoteWhenExporting { code, .. })
|
||||
if code == Code::ImportTaskAlreadyReceived.name() =>
|
||||
{
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
Err(Error::FromRemoteWhenExporting { code, message, .. })
|
||||
if code == Code::ImportTaskUnknownRemote.name() =>
|
||||
{
|
||||
tracing::warn!("remote `{remote_name}` answered with: {message}");
|
||||
Ok(ControlFlow::Break(()))
|
||||
}
|
||||
// note: there has already been many attempts to get this due to exponential backoff
|
||||
Err(Error::FromRemoteWhenExporting { code, message, .. })
|
||||
if code == Code::ImportTaskWithoutNetworkTask.name() =>
|
||||
{
|
||||
tracing::warn!("remote `{remote_name}` answered with: {message}");
|
||||
Ok(ControlFlow::Break(()))
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("error while exporting: {e}");
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
|
||||
@@ -374,4 +593,65 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||
}
|
||||
}
|
||||
|
||||
// export_one_index arguments
|
||||
pub(super) struct TargetInstance<'a> {
|
||||
pub(super) remote_name: Option<&'a str>,
|
||||
pub(super) base_url: &'a str,
|
||||
pub(super) api_key: Option<&'a str>,
|
||||
}
|
||||
|
||||
pub(super) struct ExportOptions<'a> {
|
||||
pub(super) index_uid: &'a str,
|
||||
pub(super) payload_size: Option<&'a Byte>,
|
||||
pub(super) override_settings: bool,
|
||||
pub(super) export_mode: ExportMode<'a>,
|
||||
}
|
||||
|
||||
impl ExportOptions<'_> {
|
||||
fn task_network(
|
||||
&self,
|
||||
total_index_documents: u64,
|
||||
) -> Option<(ImportData, Origin, ImportMetadata)> {
|
||||
if let ExportMode::NetworkBalancing {
|
||||
index_count,
|
||||
export_old_remote_name,
|
||||
network_change_origin,
|
||||
} = self.export_mode
|
||||
{
|
||||
Some((
|
||||
ImportData {
|
||||
remote_name: export_old_remote_name.to_string(),
|
||||
index_name: Some(self.index_uid.to_string()),
|
||||
document_count: 0,
|
||||
},
|
||||
network_change_origin.clone(),
|
||||
ImportMetadata { index_count, task_key: None, total_index_documents },
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct ExportContext<'a> {
|
||||
pub(super) index: &'a meilisearch_types::milli::Index,
|
||||
pub(super) index_rtxn: &'a milli::heed::RoTxn<'a>,
|
||||
pub(super) universe: &'a RoaringBitmap,
|
||||
pub(super) progress: &'a Progress,
|
||||
pub(super) agent: &'a ureq::Agent,
|
||||
pub(super) must_stop_processing: &'a MustStopProcessing,
|
||||
}
|
||||
|
||||
pub(super) enum ExportMode<'a> {
|
||||
ExportRoute,
|
||||
#[cfg_attr(not(feature = "enterprise"), allow(dead_code))]
|
||||
NetworkBalancing {
|
||||
index_count: u64,
|
||||
|
||||
export_old_remote_name: &'a str,
|
||||
network_change_origin: &'a Origin,
|
||||
},
|
||||
}
|
||||
|
||||
// progress related
|
||||
enum ExportIndex {}
|
||||
|
||||
@@ -438,12 +438,15 @@ async fn multipart_stream_to_s3(
|
||||
db_name: String,
|
||||
reader: std::io::PipeReader,
|
||||
) -> Result<(), Error> {
|
||||
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
|
||||
use std::collections::VecDeque;
|
||||
use std::io;
|
||||
use std::os::fd::OwnedFd;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use reqwest::{Client, Response};
|
||||
use rusty_s3::S3Action as _;
|
||||
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
|
||||
use rusty_s3::actions::CreateMultipartUpload;
|
||||
use rusty_s3::{Bucket, BucketError, Credentials, S3Action as _, UrlStyle};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
let reader = OwnedFd::from(reader);
|
||||
@@ -517,7 +520,6 @@ async fn multipart_stream_to_s3(
|
||||
while buffer.len() < (s3_multipart_part_size as usize / 2) {
|
||||
// Wait for the pipe to be readable
|
||||
|
||||
use std::io;
|
||||
reader.readable().await?;
|
||||
|
||||
match reader.try_read_buf(&mut buffer) {
|
||||
@@ -581,15 +583,17 @@ async fn multipart_stream_to_s3(
|
||||
async move {
|
||||
match client.post(url).body(body).send().await {
|
||||
Ok(resp) if resp.status().is_client_error() => {
|
||||
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||
Err(backoff::Error::Permanent(Error::S3Error {
|
||||
status: resp.status(),
|
||||
body: resp.text().await.unwrap_or_default(),
|
||||
}))
|
||||
}
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err(backoff::Error::transient(e)),
|
||||
Err(e) => Err(backoff::Error::transient(Error::S3HttpError(e))),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(Error::S3HttpError)?;
|
||||
.await?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -747,6 +747,7 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
@@ -782,6 +783,7 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
@@ -824,6 +826,7 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
@@ -867,6 +870,7 @@ fn basic_get_stats() {
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"networkTopologyChange": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
|
||||
@@ -112,6 +112,7 @@ impl IndexScheduler {
|
||||
max_number_of_batched_tasks: usize::MAX,
|
||||
batched_tasks_size_limit: u64::MAX,
|
||||
instance_features: Default::default(),
|
||||
export_default_payload_size_bytes: byte_unit::Byte::parse_str("20MiB", false).unwrap(),
|
||||
auto_upgrade: true, // Don't cost much and will ensure the happy path works
|
||||
embedding_cache_cap: 10,
|
||||
experimental_no_snapshot_compaction: false,
|
||||
|
||||
@@ -1,89 +1,93 @@
|
||||
use anyhow::bail;
|
||||
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use meilisearch_types::versioning;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::info;
|
||||
|
||||
use crate::queue::TaskQueue;
|
||||
use crate::versioning::Versioning;
|
||||
|
||||
mod v1_29;
|
||||
mod v1_30;
|
||||
trait UpgradeIndexScheduler {
|
||||
fn upgrade(
|
||||
&self,
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
original: (u32, u32, u32),
|
||||
) -> anyhow::Result<()>;
|
||||
fn target_version(&self) -> (u32, u32, u32);
|
||||
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()>;
|
||||
/// Whether the migration should be applied, depending on the initial version of the index scheduler before
|
||||
/// any migration was applied
|
||||
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
|
||||
/// A progress-centric description of the migration
|
||||
fn description(&self) -> &'static str;
|
||||
}
|
||||
|
||||
/// Upgrade the index scheduler to the binary version.
|
||||
///
|
||||
/// # Warning
|
||||
///
|
||||
/// The current implementation uses a single wtxn to the index scheduler for the whole duration of the upgrade.
|
||||
/// If migrations start taking take a long time, it might prevent tasks from being registered.
|
||||
/// If this issue manifests, then it can be mitigated by adding a `fn target_version` to `UpgradeIndexScheduler`,
|
||||
/// to be able to write intermediate versions and drop the wtxn between applying migrations.
|
||||
pub fn upgrade_index_scheduler(
|
||||
env: &Env<WithoutTls>,
|
||||
versioning: &Versioning,
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
initial_version: (u32, u32, u32),
|
||||
) -> anyhow::Result<()> {
|
||||
let current_major = to.0;
|
||||
let current_minor = to.1;
|
||||
let current_patch = to.2;
|
||||
let target_major: u32 = versioning::VERSION_MAJOR;
|
||||
let target_minor: u32 = versioning::VERSION_MINOR;
|
||||
let target_patch: u32 = versioning::VERSION_PATCH;
|
||||
let target_version = (target_major, target_minor, target_patch);
|
||||
|
||||
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
|
||||
// This is the last upgrade function, it will be called when the index is up to date.
|
||||
// any other upgrade function should be added before this one.
|
||||
&ToCurrentNoOp {},
|
||||
];
|
||||
|
||||
let start = match from {
|
||||
(1, 12, _) => 0,
|
||||
(1, 13, _) => 0,
|
||||
(1, 14, _) => 0,
|
||||
(1, 15, _) => 0,
|
||||
(1, 16, _) => 0,
|
||||
(1, 17, _) => 0,
|
||||
(1, 18, _) => 0,
|
||||
(1, 19, _) => 0,
|
||||
(1, 20, _) => 0,
|
||||
(1, 21, _) => 0,
|
||||
(1, 22, _) => 0,
|
||||
(1, 23, _) => 0,
|
||||
(1, 24, _) => 0,
|
||||
(1, 25, _) => 0,
|
||||
(1, 26, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
|| (major == current_major && minor == current_minor && patch > current_patch)
|
||||
{
|
||||
bail!(
|
||||
"Database version {major}.{minor}.{patch} is higher than the Meilisearch version {current_major}.{current_minor}.{current_patch}. Downgrade is not supported",
|
||||
);
|
||||
} else if major < 1 || (major == current_major && minor < 12) {
|
||||
bail!(
|
||||
"Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{current_major}.{current_minor}.{current_patch}",
|
||||
);
|
||||
} else {
|
||||
bail!("Unknown database version: v{major}.{minor}.{patch}");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
info!("Upgrading the task queue");
|
||||
let mut local_from = from;
|
||||
for upgrade in upgrade_functions[start..].iter() {
|
||||
let target = upgrade.target_version();
|
||||
info!(
|
||||
"Upgrading from v{}.{}.{} to v{}.{}.{}",
|
||||
local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
|
||||
);
|
||||
let mut wtxn = env.write_txn()?;
|
||||
upgrade.upgrade(env, &mut wtxn, local_from)?;
|
||||
versioning.set_version(&mut wtxn, target)?;
|
||||
wtxn.commit()?;
|
||||
local_from = target;
|
||||
if initial_version == target_version {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
|
||||
// List all upgrade functions to apply in order here.
|
||||
&v1_30::MigrateNetwork,
|
||||
];
|
||||
|
||||
let (initial_major, initial_minor, initial_patch) = initial_version;
|
||||
|
||||
if initial_version > target_version {
|
||||
bail!(
|
||||
"Database version {initial_major}.{initial_minor}.{initial_patch} is higher than the Meilisearch version {target_major}.{target_minor}.{target_patch}. Downgrade is not supported",
|
||||
);
|
||||
}
|
||||
|
||||
if initial_version < (1, 12, 0) {
|
||||
bail!(
|
||||
"Database version {initial_major}.{initial_minor}.{initial_patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{initial_major}.{initial_minor}.{initial_patch} and import it in the v{target_major}.{target_minor}.{target_patch}",
|
||||
);
|
||||
}
|
||||
|
||||
info!("Upgrading the task queue");
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let migration_count = upgrade_functions.len();
|
||||
for (migration_index, upgrade) in upgrade_functions.iter().enumerate() {
|
||||
if upgrade.must_upgrade(initial_version) {
|
||||
info!(
|
||||
"[{migration_index}/{migration_count}]Applying migration: {}",
|
||||
upgrade.description()
|
||||
);
|
||||
|
||||
upgrade.upgrade(env, &mut wtxn)?;
|
||||
|
||||
info!(
|
||||
"[{}/{migration_count}]Migration applied: {}",
|
||||
migration_index + 1,
|
||||
upgrade.description()
|
||||
)
|
||||
} else {
|
||||
info!(
|
||||
"[{migration_index}/{migration_count}]Skipping unnecessary migration: {}",
|
||||
upgrade.description()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
versioning.set_version(&mut wtxn, target_version)?;
|
||||
info!("Task queue upgraded, spawning the upgrade database task");
|
||||
|
||||
let queue = TaskQueue::new(env, &mut wtxn)?;
|
||||
let uid = queue.next_task_id(&wtxn)?;
|
||||
queue.register(
|
||||
@@ -96,9 +100,9 @@ pub fn upgrade_index_scheduler(
|
||||
finished_at: None,
|
||||
error: None,
|
||||
canceled_by: None,
|
||||
details: Some(Details::UpgradeDatabase { from, to }),
|
||||
details: Some(Details::UpgradeDatabase { from: initial_version, to: target_version }),
|
||||
status: Status::Enqueued,
|
||||
kind: KindWithContent::UpgradeDatabase { from },
|
||||
kind: KindWithContent::UpgradeDatabase { from: initial_version },
|
||||
network: None,
|
||||
custom_metadata: None,
|
||||
},
|
||||
@@ -107,21 +111,3 @@ pub fn upgrade_index_scheduler(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
struct ToCurrentNoOp {}
|
||||
|
||||
impl UpgradeIndexScheduler for ToCurrentNoOp {
|
||||
fn upgrade(
|
||||
&self,
|
||||
_env: &Env<WithoutTls>,
|
||||
_wtxn: &mut RwTxn,
|
||||
_original: (u32, u32, u32),
|
||||
) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn target_version(&self) -> (u32, u32, u32) {
|
||||
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
|
||||
}
|
||||
}
|
||||
|
||||
47
crates/index-scheduler/src/upgrade/v1_29.rs
Normal file
47
crates/index-scheduler/src/upgrade/v1_29.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Env, RoTxn, WithoutTls};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::Result;
|
||||
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub sharding: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
|
||||
pub fn get_network(env: &Env<WithoutTls>, rtxn: &RoTxn) -> Result<Option<Network>> {
|
||||
let Some(network_db) =
|
||||
env.open_database::<Str, SerdeJson<Network>>(rtxn, Some(db_name::EXPERIMENTAL_FEATURES))?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(network_db.get(rtxn, db_keys::NETWORK)?)
|
||||
}
|
||||
82
crates/index-scheduler/src/upgrade/v1_30.rs
Normal file
82
crates/index-scheduler/src/upgrade/v1_30.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub leader: Option<String>,
|
||||
#[serde(default)]
|
||||
pub version: Uuid,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
|
||||
use super::v1_29;
|
||||
use crate::Result;
|
||||
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
|
||||
pub struct MigrateNetwork;
|
||||
|
||||
impl super::UpgradeIndexScheduler for MigrateNetwork {
|
||||
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()> {
|
||||
let Some(v1_29::Network { local, remotes, sharding }) = v1_29::get_network(env, wtxn)?
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let leader = if sharding { remotes.keys().next().cloned() } else { None };
|
||||
|
||||
let remotes = remotes
|
||||
.into_iter()
|
||||
.map(|(name, v1_29::Remote { url, search_api_key, write_api_key })| {
|
||||
(name, Remote { url, search_api_key, write_api_key })
|
||||
})
|
||||
.collect();
|
||||
|
||||
let network = Network { local, remotes, leader, version: Uuid::now_v7() };
|
||||
|
||||
set_network(env, wtxn, &network)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||
initial_version < (1, 30, 0)
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"updating the network struct"
|
||||
}
|
||||
}
|
||||
|
||||
fn set_network(env: &Env<WithoutTls>, wtxn: &mut RwTxn<'_>, network: &Network) -> Result<()> {
|
||||
let network_db =
|
||||
env.create_database::<Str, SerdeJson<Network>>(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
|
||||
|
||||
network_db.put(wtxn, db_keys::NETWORK, network)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -4,9 +4,11 @@ use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::CboRoaringBitmapCodec;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, ChannelCongestion};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{
|
||||
BatchStopReason, Details, IndexSwap, Kind, KindWithContent, Status,
|
||||
@@ -119,17 +121,8 @@ impl ProcessingBatch {
|
||||
self.stats.total_nb_tasks = 0;
|
||||
}
|
||||
|
||||
/// Update the timestamp of the tasks and the inner structure of this structure.
|
||||
pub fn update(&mut self, task: &mut Task) {
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state or that failed.
|
||||
task.batch_uid = Some(self.uid);
|
||||
// Same
|
||||
task.started_at = Some(self.started_at);
|
||||
task.finished_at = self.finished_at;
|
||||
|
||||
/// Update batch task from a processed task
|
||||
pub fn update_from_task(&mut self, task: &Task) {
|
||||
self.statuses.insert(task.status);
|
||||
|
||||
// Craft an aggregation of the details of all the tasks encountered in this batch.
|
||||
@@ -144,6 +137,63 @@ impl ProcessingBatch {
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the timestamp of the tasks after they're done
|
||||
pub fn finish_task(&self, task: &mut Task) {
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state or that failed.
|
||||
task.batch_uid = Some(self.uid);
|
||||
// Same
|
||||
task.started_at = Some(self.started_at);
|
||||
task.finished_at = self.finished_at;
|
||||
}
|
||||
|
||||
pub fn write_stats(
|
||||
&mut self,
|
||||
progress: &Progress,
|
||||
congestion: Option<ChannelCongestion>,
|
||||
pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
) {
|
||||
self.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
self.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
let mut congestion_info = serde_json::Map::new();
|
||||
congestion_info.insert("attempts".into(), congestion.attempts.into());
|
||||
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
|
||||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
self.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
use byte_unit::Byte;
|
||||
use byte_unit::UnitType::Binary;
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((
|
||||
dbname.to_case(Case::Camel),
|
||||
format!("{post:#.2} ({sign}{diff:#.2})").into(),
|
||||
))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
|
||||
pub fn to_batch(&self) -> Batch {
|
||||
Batch {
|
||||
uid: self.uid,
|
||||
@@ -286,6 +336,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
| K::DumpCreation { .. }
|
||||
| K::Export { .. }
|
||||
| K::UpgradeDatabase { .. }
|
||||
| K::NetworkTopologyChange(_)
|
||||
| K::SnapshotCreation => (),
|
||||
};
|
||||
if let Some(Details::IndexSwap { swaps }) = &mut task.details {
|
||||
@@ -627,6 +678,9 @@ impl crate::IndexScheduler {
|
||||
} => {
|
||||
assert_eq!(kind.as_kind(), Kind::IndexCompaction);
|
||||
}
|
||||
Details::NetworkTopologyChange { moved_documents: _, message: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::NetworkTopologyChange);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -64,14 +64,7 @@ impl Versioning {
|
||||
};
|
||||
wtxn.commit()?;
|
||||
|
||||
let bin_major: u32 = versioning::VERSION_MAJOR;
|
||||
let bin_minor: u32 = versioning::VERSION_MINOR;
|
||||
let bin_patch: u32 = versioning::VERSION_PATCH;
|
||||
let to = (bin_major, bin_minor, bin_patch);
|
||||
|
||||
if from != to {
|
||||
upgrade_index_scheduler(env, &this, from, to)?;
|
||||
}
|
||||
upgrade_index_scheduler(env, &this, from)?;
|
||||
|
||||
// Once we reach this point it means the upgrade process, if there was one is entirely finished
|
||||
// we can safely say we reached the latest version of the index scheduler
|
||||
|
||||
@@ -15,7 +15,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.6.0"
|
||||
criterion = "0.7.0"
|
||||
|
||||
[[bench]]
|
||||
name = "depth"
|
||||
|
||||
@@ -13,7 +13,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
md5 = "0.8.0"
|
||||
once_cell = "1.21"
|
||||
regex-lite = "0.1.6"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
regex-lite = "0.1.8"
|
||||
uuid = { version = "1.18.1", features = ["v4"] }
|
||||
|
||||
@@ -12,15 +12,15 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.22.1"
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -11,38 +11,41 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.11.0", default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
actix-web = { version = "4.12.0", default-features = false }
|
||||
anyhow = "1.0.100"
|
||||
base64 = "0.22.1"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
convert_case = "0.9.0"
|
||||
csv = "1.4.0"
|
||||
deserr = { version = "0.6.4", features = ["actix-web"] }
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
flate2 = "1.1.5"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.7"
|
||||
itertools = "0.14.0"
|
||||
memmap2 = "0.9.9"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rustc-hash = "2.1.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.45"
|
||||
tokio = "1.48"
|
||||
urlencoding = "2.1.3"
|
||||
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
@@ -56,6 +59,9 @@ all-tokenizations = ["milli/all-tokenizations"]
|
||||
# chinese specialized tokenization
|
||||
chinese = ["milli/chinese"]
|
||||
chinese-pinyin = ["milli/chinese-pinyin"]
|
||||
|
||||
enterprise = ["milli/enterprise"]
|
||||
|
||||
# hebrew specialized tokenization
|
||||
hebrew = ["milli/hebrew"]
|
||||
# japanese specialized tokenization
|
||||
|
||||
16
crates/meilisearch-types/src/community_edition.rs
Normal file
16
crates/meilisearch-types/src/community_edition.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
pub mod network {
|
||||
use milli::update::new::indexer::current_edition::sharding::Shards;
|
||||
|
||||
use crate::network::Network;
|
||||
|
||||
impl Network {
|
||||
pub fn shards(&self) -> Option<Shards> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn sharding(&self) -> bool {
|
||||
// always false in CE
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,45 +3,23 @@
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::update::new::indexer::enterprise_edition::sharding::Shards;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub sharding: bool,
|
||||
}
|
||||
use crate::network::Network;
|
||||
|
||||
impl Network {
|
||||
pub fn shards(&self) -> Option<Shards> {
|
||||
if self.sharding {
|
||||
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
|
||||
let others = self
|
||||
.remotes
|
||||
.keys()
|
||||
.filter(|name| name.as_str() != this)
|
||||
.map(|name| name.to_owned())
|
||||
.collect();
|
||||
Some(Shards { own: vec![this.to_owned()], others })
|
||||
if self.sharding() {
|
||||
Some(Shards::from_remotes_local(
|
||||
self.remotes.keys().map(String::as_str),
|
||||
self.local.as_deref(),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
pub fn sharding(&self) -> bool {
|
||||
self.leader.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ macro_rules! make_error_codes {
|
||||
}
|
||||
|
||||
/// return error name, used as error code
|
||||
fn name(&self) -> String {
|
||||
pub fn name(&self) -> String {
|
||||
match self {
|
||||
$(
|
||||
Code::$code_ident => stringify!($code_ident).to_case(convert_case::Case::Snake)
|
||||
@@ -214,6 +214,9 @@ ImmutableApiKeyUid , InvalidRequest , BAD_REQU
|
||||
ImmutableApiKeyUpdatedAt , InvalidRequest , BAD_REQUEST;
|
||||
ImmutableIndexCreatedAt , InvalidRequest , BAD_REQUEST;
|
||||
ImmutableIndexUpdatedAt , InvalidRequest , BAD_REQUEST;
|
||||
ImportTaskAlreadyReceived , InvalidRequest , PRECONDITION_FAILED;
|
||||
ImportTaskUnknownRemote , InvalidRequest , PRECONDITION_FAILED;
|
||||
ImportTaskWithoutNetworkTask , InvalidRequest , SERVICE_UNAVAILABLE;
|
||||
IndexAlreadyExists , InvalidRequest , CONFLICT ;
|
||||
IndexCreationFailed , Internal , INTERNAL_SERVER_ERROR;
|
||||
IndexNotFound , InvalidRequest , NOT_FOUND;
|
||||
@@ -270,9 +273,9 @@ InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQU
|
||||
InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkLeader , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -377,7 +380,9 @@ MissingPayload , InvalidRequest , BAD_REQU
|
||||
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
|
||||
NetworkVersionMismatch , InvalidRequest , PRECONDITION_FAILED ;
|
||||
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
|
||||
NotLeader , InvalidRequest , BAD_REQUEST ;
|
||||
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
|
||||
RemoteBadResponse , System , BAD_GATEWAY ;
|
||||
RemoteBadRequest , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -391,6 +396,9 @@ TaskFileNotFound , InvalidRequest , NOT_FOUN
|
||||
BatchNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
|
||||
TooManyVectors , InvalidRequest , BAD_REQUEST ;
|
||||
UnexpectedNetworkPreviousRemotes , InvalidRequest , BAD_REQUEST ;
|
||||
NetworkVersionTooOld , InvalidRequest , BAD_REQUEST ;
|
||||
UnprocessedNetworkTask , InvalidRequest , BAD_REQUEST ;
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
@@ -433,6 +441,7 @@ InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQU
|
||||
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
RequiresEnterpriseEdition , InvalidRequest , UNAVAILABLE_FOR_LEGAL_REASONS ;
|
||||
// Webhooks
|
||||
InvalidWebhooks , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidWebhookUrl , InvalidRequest , BAD_REQUEST ;
|
||||
|
||||
@@ -2,10 +2,17 @@
|
||||
|
||||
pub mod batch_view;
|
||||
pub mod batches;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub mod community_edition;
|
||||
pub mod compression;
|
||||
pub mod deserr;
|
||||
pub mod document_formats;
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub mod enterprise_edition;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub use community_edition as current_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub use enterprise_edition as current_edition;
|
||||
pub mod error;
|
||||
pub mod facet_values_sort;
|
||||
pub mod features;
|
||||
@@ -13,6 +20,7 @@ pub mod index_uid;
|
||||
pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
pub mod locales;
|
||||
pub mod network;
|
||||
pub mod settings;
|
||||
pub mod star_or;
|
||||
pub mod task_view;
|
||||
|
||||
27
crates/meilisearch-types/src/network.rs
Normal file
27
crates/meilisearch-types/src/network.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub leader: Option<String>,
|
||||
#[serde(default)]
|
||||
pub version: Uuid,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
@@ -9,12 +9,12 @@ use utoipa::ToSchema;
|
||||
use crate::batches::BatchId;
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::network::DbTaskNetwork;
|
||||
use crate::tasks::{
|
||||
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
|
||||
TaskNetwork,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
@@ -54,7 +54,7 @@ pub struct TaskView {
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
pub network: Option<DbTaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
@@ -151,6 +151,11 @@ pub struct DetailsView {
|
||||
pub pre_compaction_size: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub post_compaction_size: Option<String>,
|
||||
// network topology change
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub moved_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub message: Option<String>,
|
||||
}
|
||||
|
||||
impl DetailsView {
|
||||
@@ -161,6 +166,17 @@ impl DetailsView {
|
||||
(None, Some(doc)) | (Some(doc), None) => Some(doc),
|
||||
(Some(left), Some(right)) => Some(left + right),
|
||||
},
|
||||
moved_documents: match (self.moved_documents, other.moved_documents) {
|
||||
(None, None) => None,
|
||||
(None, Some(doc)) | (Some(doc), None) => Some(doc),
|
||||
(Some(left), Some(right)) => Some(left + right),
|
||||
},
|
||||
message: match (&mut self.message, &other.message) {
|
||||
(None, None) => None,
|
||||
(None, Some(message)) => Some(message.clone()),
|
||||
(Some(message), None) => Some(std::mem::take(message)),
|
||||
(Some(message), Some(_)) => Some(std::mem::take(message)),
|
||||
},
|
||||
indexed_documents: match (self.indexed_documents, other.indexed_documents) {
|
||||
(None, None) => None,
|
||||
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
|
||||
@@ -451,6 +467,11 @@ impl From<Details> for DetailsView {
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
Details::NetworkTopologyChange { moved_documents, message } => DetailsView {
|
||||
moved_documents: Some(moved_documents),
|
||||
message: Some(message),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,8 @@ use crate::{versioning, InstanceUid};
|
||||
|
||||
pub type TaskId = u32;
|
||||
|
||||
pub mod network;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Task {
|
||||
@@ -44,7 +46,7 @@ pub struct Task {
|
||||
pub kind: KindWithContent,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
pub network: Option<network::DbTaskNetwork>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub custom_metadata: Option<String>,
|
||||
@@ -61,6 +63,7 @@ impl Task {
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| NetworkTopologyChange { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentEdition { index_uid, .. }
|
||||
@@ -99,6 +102,7 @@ impl Task {
|
||||
| KindWithContent::SnapshotCreation
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::NetworkTopologyChange { .. }
|
||||
| KindWithContent::IndexCompaction { .. } => None,
|
||||
}
|
||||
}
|
||||
@@ -178,6 +182,7 @@ pub enum KindWithContent {
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
NetworkTopologyChange(network::NetworkTopologyChange),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
@@ -215,6 +220,7 @@ impl KindWithContent {
|
||||
KindWithContent::Export { .. } => Kind::Export,
|
||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||
KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction,
|
||||
KindWithContent::NetworkTopologyChange { .. } => Kind::NetworkTopologyChange,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,6 +233,7 @@ impl KindWithContent {
|
||||
| TaskCancelation { .. }
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| NetworkTopologyChange { .. }
|
||||
| UpgradeDatabase { .. } => vec![],
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentEdition { index_uid, .. }
|
||||
@@ -340,6 +347,10 @@ impl KindWithContent {
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
KindWithContent::NetworkTopologyChange { .. } => Some(Details::NetworkTopologyChange {
|
||||
moved_documents: 0,
|
||||
message: "processing tasks for previous network versions".into(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,7 +403,7 @@ impl KindWithContent {
|
||||
})
|
||||
}
|
||||
KindWithContent::IndexSwap { .. } => {
|
||||
todo!()
|
||||
unimplemented!("do not call `default_finished_details` for `IndexSwap` tasks")
|
||||
}
|
||||
KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation {
|
||||
matched_tasks: tasks.len(),
|
||||
@@ -427,6 +438,9 @@ impl KindWithContent {
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change) => {
|
||||
Some(network_topology_change.to_details())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -494,6 +508,9 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
KindWithContent::NetworkTopologyChange(network_topology_change) => {
|
||||
Some(network_topology_change.to_details())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -605,6 +622,7 @@ pub enum Kind {
|
||||
Export,
|
||||
UpgradeDatabase,
|
||||
IndexCompaction,
|
||||
NetworkTopologyChange,
|
||||
}
|
||||
|
||||
impl Kind {
|
||||
@@ -624,6 +642,7 @@ impl Kind {
|
||||
| Kind::DumpCreation
|
||||
| Kind::Export
|
||||
| Kind::UpgradeDatabase
|
||||
| Kind::NetworkTopologyChange
|
||||
| Kind::SnapshotCreation => false,
|
||||
}
|
||||
}
|
||||
@@ -646,6 +665,7 @@ impl Display for Kind {
|
||||
Kind::Export => write!(f, "export"),
|
||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||
Kind::IndexCompaction => write!(f, "indexCompaction"),
|
||||
Kind::NetworkTopologyChange => write!(f, "networkTopologyChange"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -683,6 +703,8 @@ impl FromStr for Kind {
|
||||
Ok(Kind::UpgradeDatabase)
|
||||
} else if kind.eq_ignore_ascii_case("indexCompaction") {
|
||||
Ok(Kind::IndexCompaction)
|
||||
} else if kind.eq_ignore_ascii_case("networkTopologyChange") {
|
||||
Ok(Kind::NetworkTopologyChange)
|
||||
} else {
|
||||
Err(ParseTaskKindError(kind.to_owned()))
|
||||
}
|
||||
@@ -773,36 +795,10 @@ pub enum Details {
|
||||
pre_compaction_size: Option<Byte>,
|
||||
post_compaction_size: Option<Byte>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(untagged, rename_all = "camelCase")]
|
||||
pub enum TaskNetwork {
|
||||
Origin { origin: Origin },
|
||||
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
|
||||
}
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Origin {
|
||||
pub remote_name: String,
|
||||
pub task_uid: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RemoteTask {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
task_uid: Option<TaskId>,
|
||||
error: Option<ResponseError>,
|
||||
}
|
||||
|
||||
impl From<Result<TaskId, ResponseError>> for RemoteTask {
|
||||
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
|
||||
match res {
|
||||
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
|
||||
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
|
||||
}
|
||||
}
|
||||
NetworkTopologyChange {
|
||||
moved_documents: u64,
|
||||
message: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
@@ -845,6 +841,9 @@ impl Details {
|
||||
| Self::Export { .. }
|
||||
| Self::UpgradeDatabase { .. }
|
||||
| Self::IndexSwap { .. } => (),
|
||||
Self::NetworkTopologyChange { moved_documents: _, message } => {
|
||||
*message = format!("Failed. Previous status: {}", message);
|
||||
}
|
||||
}
|
||||
|
||||
details
|
||||
787
crates/meilisearch-types/src/tasks/network.rs
Normal file
787
crates/meilisearch-types/src/tasks/network.rs
Normal file
@@ -0,0 +1,787 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use base64::Engine as _;
|
||||
use itertools::{EitherOrBoth, Itertools as _};
|
||||
use milli::{CboRoaringBitmapCodec, DocumentId};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::network::{Network, Remote};
|
||||
use crate::tasks::{Details, TaskId};
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
mod community_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
mod enterprise_edition;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(untagged, rename_all = "camelCase")]
|
||||
// This type is used in the database, care should be taken when modifying it.
|
||||
pub enum DbTaskNetwork {
|
||||
/// Tasks that were duplicated from `origin`
|
||||
Origin { origin: Origin },
|
||||
/// Tasks that were duplicated as `remote_tasks`
|
||||
Remotes {
|
||||
remote_tasks: BTreeMap<String, RemoteTask>,
|
||||
#[serde(default)]
|
||||
network_version: Uuid,
|
||||
},
|
||||
/// Document import tasks sent in the context of `network_change`
|
||||
Import { import_from: ImportData, network_change: Origin },
|
||||
}
|
||||
|
||||
impl DbTaskNetwork {
|
||||
pub fn network_version(&self) -> Uuid {
|
||||
match self {
|
||||
DbTaskNetwork::Origin { origin } => origin.network_version,
|
||||
DbTaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version,
|
||||
DbTaskNetwork::Import { import_from: _, network_change } => {
|
||||
network_change.network_version
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn import_data(&self) -> Option<&ImportData> {
|
||||
match self {
|
||||
DbTaskNetwork::Origin { .. } | DbTaskNetwork::Remotes { .. } => None,
|
||||
DbTaskNetwork::Import { import_from, .. } => Some(import_from),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn origin(&self) -> Option<&Origin> {
|
||||
match self {
|
||||
DbTaskNetwork::Origin { origin } => Some(origin),
|
||||
DbTaskNetwork::Remotes { .. } => None,
|
||||
DbTaskNetwork::Import { network_change, .. } => Some(network_change),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum TaskNetwork {
|
||||
/// Tasks that were duplicated from `origin`
|
||||
Origin { origin: Origin },
|
||||
/// Tasks that were duplicated as `remote_tasks`
|
||||
Remotes { remote_tasks: BTreeMap<String, RemoteTask>, network_version: Uuid },
|
||||
/// Document import tasks sent in the context of `network_change`
|
||||
Import { import_from: ImportData, network_change: Origin, metadata: ImportMetadata },
|
||||
}
|
||||
|
||||
impl TaskNetwork {
|
||||
pub fn network_version(&self) -> Uuid {
|
||||
match self {
|
||||
TaskNetwork::Origin { origin } => origin.network_version,
|
||||
TaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version,
|
||||
TaskNetwork::Import { import_from: _, network_change, metadata: _ } => {
|
||||
network_change.network_version
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TaskNetwork> for DbTaskNetwork {
|
||||
fn from(value: TaskNetwork) -> Self {
|
||||
match value {
|
||||
TaskNetwork::Origin { origin } => DbTaskNetwork::Origin { origin },
|
||||
TaskNetwork::Remotes { remote_tasks, network_version } => {
|
||||
DbTaskNetwork::Remotes { remote_tasks, network_version }
|
||||
}
|
||||
TaskNetwork::Import { import_from, network_change, metadata: _ } => {
|
||||
DbTaskNetwork::Import { import_from, network_change }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Origin {
|
||||
pub remote_name: String,
|
||||
pub task_uid: u32,
|
||||
#[serde(default)]
|
||||
pub network_version: Uuid,
|
||||
}
|
||||
|
||||
/// Import data stored in a task
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ImportData {
|
||||
/// Remote that this task is imported from
|
||||
pub remote_name: String,
|
||||
/// Index relevant to this task
|
||||
pub index_name: Option<String>,
|
||||
/// Number of documents in this task
|
||||
pub document_count: u64,
|
||||
}
|
||||
|
||||
/// Import metadata associated with a task but not stored in the task
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ImportMetadata {
|
||||
/// Total number of indexes to import from this host
|
||||
pub index_count: u64,
|
||||
/// Key unique to this (network_change, index, host, key).
|
||||
///
|
||||
/// In practice, an internal document id of one of the documents to import.
|
||||
pub task_key: Option<DocumentId>,
|
||||
/// Total number of documents to import for this index from this host.
|
||||
pub total_index_documents: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RemoteTask {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
task_uid: Option<TaskId>,
|
||||
error: Option<ResponseError>,
|
||||
}
|
||||
|
||||
impl From<Result<TaskId, ResponseError>> for RemoteTask {
|
||||
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
|
||||
match res {
|
||||
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
|
||||
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Contains the full state of a network topology change.
|
||||
///
|
||||
/// A network topology change task is unique in that it can be processed in multiple different batches, as its resolution
|
||||
/// depends on various document additions tasks being processed.
|
||||
///
|
||||
/// A network topology task has 4 states:
|
||||
///
|
||||
/// 1. Processing any task that was meant for an earlier version of the network. This is necessary to know that we have the right version of
|
||||
/// documents.
|
||||
/// 2. Sending all documents that must be moved to other remotes.
|
||||
/// 3. Processing any task coming from the remotes.
|
||||
/// 4. Finished.
|
||||
///
|
||||
/// Furthermore, it maintains some stats
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NetworkTopologyChange {
|
||||
state: NetworkTopologyState,
|
||||
// in name, `None` if the node is no longer part of the network
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
in_name: Option<String>,
|
||||
// out name, `None` if the node is new to the network
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
out_name: Option<String>,
|
||||
out_remotes: BTreeMap<String, Remote>,
|
||||
in_remotes: BTreeMap<String, InRemote>,
|
||||
stats: NetworkTopologyStats,
|
||||
}
|
||||
|
||||
impl NetworkTopologyChange {
|
||||
pub fn new(old_network: Network, new_network: Network) -> Self {
|
||||
// we use our new name as import name
|
||||
let in_name = new_network.local;
|
||||
// we use our old name as export name
|
||||
let out_name = old_network.local.or_else(|| in_name.clone());
|
||||
|
||||
// we export to the new network
|
||||
let mut out_remotes = new_network.remotes;
|
||||
// don't export to ourselves
|
||||
if let Some(in_name) = &in_name {
|
||||
out_remotes.remove(in_name);
|
||||
}
|
||||
let in_remotes = if in_name.is_some() {
|
||||
old_network
|
||||
.remotes
|
||||
.into_keys()
|
||||
.chain(out_remotes.keys().cloned())
|
||||
// don't await imports from ourselves
|
||||
.filter(|name| Some(name.as_str()) != out_name.as_deref())
|
||||
.map(|name| (name, InRemote::new()))
|
||||
.collect()
|
||||
} else {
|
||||
Default::default()
|
||||
};
|
||||
Self {
|
||||
state: NetworkTopologyState::WaitingForOlderTasks,
|
||||
in_name,
|
||||
out_name,
|
||||
out_remotes,
|
||||
in_remotes,
|
||||
stats: NetworkTopologyStats { moved_documents: 0 },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn state(&self) -> NetworkTopologyState {
|
||||
self.state
|
||||
}
|
||||
|
||||
pub fn out_name(&self) -> Option<&str> {
|
||||
// unwrap: one of out name or in_name must be defined
|
||||
self.out_name.as_deref()
|
||||
}
|
||||
|
||||
pub fn in_name(&self) -> Option<&str> {
|
||||
self.in_name.as_deref()
|
||||
}
|
||||
|
||||
pub fn to_details(&self) -> Details {
|
||||
let message = match self.state {
|
||||
NetworkTopologyState::WaitingForOlderTasks => {
|
||||
"Waiting for tasks enqueued before the network change to finish processing".into()
|
||||
}
|
||||
NetworkTopologyState::ExportingDocuments => "Exporting documents".into(),
|
||||
NetworkTopologyState::ImportingDocuments => {
|
||||
let mut finished_count = 0;
|
||||
let mut first_ongoing = None;
|
||||
let mut ongoing_total_indexes = 0;
|
||||
let mut ongoing_processed_documents = 0;
|
||||
let mut ongoing_missing_documents = 0;
|
||||
let mut ongoing_total_documents = 0;
|
||||
let mut other_ongoing_count = 0;
|
||||
let mut first_waiting = None;
|
||||
let mut other_waiting_count = 0;
|
||||
for (remote_name, in_remote) in &self.in_remotes {
|
||||
match &in_remote.import_state {
|
||||
ImportState::WaitingForInitialTask => {
|
||||
first_waiting = match first_waiting {
|
||||
None => Some(remote_name),
|
||||
first_waiting => {
|
||||
other_waiting_count += 1;
|
||||
first_waiting
|
||||
}
|
||||
};
|
||||
}
|
||||
ImportState::Ongoing { import_index_state, total_indexes } => {
|
||||
first_ongoing = match first_ongoing {
|
||||
None => {
|
||||
ongoing_total_indexes = *total_indexes;
|
||||
Some(remote_name)
|
||||
}
|
||||
first_ongoing => {
|
||||
other_ongoing_count += 1;
|
||||
first_ongoing
|
||||
}
|
||||
};
|
||||
for import_state in import_index_state.values() {
|
||||
match import_state {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
processed_documents,
|
||||
received_documents,
|
||||
task_keys: _,
|
||||
} => {
|
||||
ongoing_total_documents += total_documents;
|
||||
ongoing_processed_documents += processed_documents;
|
||||
ongoing_missing_documents +=
|
||||
total_documents.saturating_sub(*received_documents);
|
||||
}
|
||||
ImportIndexState::Finished { total_documents } => {
|
||||
ongoing_total_documents += total_documents;
|
||||
ongoing_processed_documents += total_documents;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportState::Finished { total_indexes, total_documents } => {
|
||||
finished_count += 1;
|
||||
ongoing_total_indexes = *total_indexes;
|
||||
ongoing_total_documents += *total_documents;
|
||||
ongoing_processed_documents += *total_documents;
|
||||
}
|
||||
}
|
||||
}
|
||||
format!(
|
||||
"Importing documents from {total} remotes{waiting}{ongoing}{finished}",
|
||||
total = self.in_remotes.len(),
|
||||
waiting = if let Some(first_waiting) = first_waiting {
|
||||
format!(
|
||||
", waiting on first task from `{}`{others}",
|
||||
first_waiting,
|
||||
others = if other_waiting_count > 0 {
|
||||
format!(" and {other_waiting_count} other remotes")
|
||||
} else {
|
||||
"".into()
|
||||
}
|
||||
)
|
||||
} else {
|
||||
"".into()
|
||||
},
|
||||
ongoing = if let Some(first_ongoing) = first_ongoing {
|
||||
format!(", awaiting {ongoing_missing_documents} and processed {ongoing_processed_documents} out of {ongoing_total_documents} documents in {ongoing_total_indexes} indexes from `{first_ongoing}`{others}",
|
||||
others=if other_ongoing_count > 0 {format!(" and {other_ongoing_count} other remotes")} else {"".into()})
|
||||
} else {
|
||||
"".into()
|
||||
},
|
||||
finished = if finished_count >= 0 {
|
||||
format!(", {finished_count} remotes finished processing")
|
||||
} else {
|
||||
"".into()
|
||||
}
|
||||
)
|
||||
}
|
||||
NetworkTopologyState::Finished => "Finished".into(),
|
||||
};
|
||||
Details::NetworkTopologyChange { moved_documents: self.stats.moved_documents, message }
|
||||
}
|
||||
|
||||
pub fn merge(&mut self, other: NetworkTopologyChange) {
|
||||
// The topology change has a guarantee of forward progress, so for each field we're going to keep the "most advanced" values.
|
||||
let Self { state, in_name: _, out_name: _, out_remotes: _, in_remotes, stats } = self;
|
||||
|
||||
*state = Ord::max(*state, other.state);
|
||||
*stats = Ord::max(*stats, other.stats);
|
||||
|
||||
for (old_value, new_value) in other.in_remotes.into_values().zip(in_remotes.values_mut()) {
|
||||
new_value.import_state = match (old_value.import_state, std::mem::take(&mut new_value.import_state)) {
|
||||
// waiting for initial task is always older
|
||||
(ImportState::WaitingForInitialTask, newer)
|
||||
| (newer, ImportState::WaitingForInitialTask)
|
||||
|
||||
// finished is always newer
|
||||
| (_, newer @ ImportState::Finished { .. })
|
||||
| (newer @ ImportState::Finished { .. }, _) => newer,
|
||||
(
|
||||
ImportState::Ongoing { import_index_state: left_import, total_indexes: left_total_indexes },
|
||||
ImportState::Ongoing { import_index_state: right_import, total_indexes: right_total_indexes },
|
||||
) => {
|
||||
let import_index_state = left_import.into_iter().merge_join_by(right_import.into_iter(), |(k,_), (x, _)|k.cmp(x)).map(|eob|
|
||||
match eob {
|
||||
EitherOrBoth::Both((name, left), (_, right)) => {
|
||||
let newer = merge_import_index_state(left, right);
|
||||
(name, newer)
|
||||
},
|
||||
EitherOrBoth::Left(import) |
|
||||
EitherOrBoth::Right(import) => import,
|
||||
}
|
||||
).collect();
|
||||
|
||||
ImportState::Ongoing{ import_index_state, total_indexes : u64::max(left_total_indexes, right_total_indexes) }
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_import_index_state(left: ImportIndexState, right: ImportIndexState) -> ImportIndexState {
|
||||
match (left, right) {
|
||||
(_, newer @ ImportIndexState::Finished { .. }) => newer,
|
||||
(newer @ ImportIndexState::Finished { .. }, _) => newer,
|
||||
(
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents: left_total_documents,
|
||||
received_documents: left_received_documents,
|
||||
processed_documents: left_processed_documents,
|
||||
task_keys: mut left_task_keys,
|
||||
},
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents: right_total_documents,
|
||||
received_documents: right_received_documents,
|
||||
processed_documents: right_processed_documents,
|
||||
task_keys: right_task_keys,
|
||||
},
|
||||
) => {
|
||||
let total_documents = u64::max(left_total_documents, right_total_documents);
|
||||
let received_documents = u64::max(left_received_documents, right_received_documents);
|
||||
let processed_documents = u64::max(left_processed_documents, right_processed_documents);
|
||||
left_task_keys.0 |= &right_task_keys.0;
|
||||
let task_keys = left_task_keys;
|
||||
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents,
|
||||
processed_documents,
|
||||
task_keys,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum NetworkTopologyState {
|
||||
WaitingForOlderTasks,
|
||||
ExportingDocuments,
|
||||
ImportingDocuments,
|
||||
Finished,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq, PartialOrd, Ord)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NetworkTopologyStats {
|
||||
#[serde(default)]
|
||||
pub moved_documents: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct InRemote {
|
||||
import_state: ImportState,
|
||||
}
|
||||
|
||||
impl InRemote {
|
||||
pub fn new() -> Self {
|
||||
Self { import_state: ImportState::WaitingForInitialTask }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum ImportState {
|
||||
/// Initially Meilisearch doesn't know how many documents it should expect from a remote.
|
||||
/// Any task from each remote contains the information of how many indexes will be imported,
|
||||
/// and the number of documents to import for the index of the task.
|
||||
#[default]
|
||||
WaitingForInitialTask,
|
||||
Ongoing {
|
||||
import_index_state: BTreeMap<String, ImportIndexState>,
|
||||
total_indexes: u64,
|
||||
},
|
||||
Finished {
|
||||
total_indexes: u64,
|
||||
total_documents: u64,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum ImportIndexState {
|
||||
Ongoing {
|
||||
total_documents: u64,
|
||||
received_documents: u64,
|
||||
processed_documents: u64,
|
||||
task_keys: TaskKeys,
|
||||
},
|
||||
Finished {
|
||||
total_documents: u64,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TaskKeys(pub RoaringBitmap);
|
||||
|
||||
impl Serialize for TaskKeys {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
let TaskKeys(task_keys) = self;
|
||||
let mut bytes = Vec::new();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(task_keys, &mut bytes);
|
||||
let encoded = base64::prelude::BASE64_STANDARD.encode(&bytes);
|
||||
serializer.serialize_str(&encoded)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for TaskKeys {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_str(TaskKeysVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
struct TaskKeysVisitor;
|
||||
impl<'de> serde::de::Visitor<'de> for TaskKeysVisitor {
|
||||
type Value = TaskKeys;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
formatter.write_str("a base64 encoded cbo roaring bitmap")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, encoded: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
let decoded = base64::prelude::BASE64_STANDARD.decode(encoded).map_err(|_err| {
|
||||
E::invalid_value(serde::de::Unexpected::Str(encoded), &"a base64 string")
|
||||
})?;
|
||||
self.visit_bytes(&decoded)
|
||||
}
|
||||
|
||||
fn visit_bytes<E>(self, decoded: &[u8]) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
let task_keys = CboRoaringBitmapCodec::deserialize_from(decoded).map_err(|_err| {
|
||||
E::invalid_value(serde::de::Unexpected::Bytes(decoded), &"a cbo roaring bitmap")
|
||||
})?;
|
||||
Ok(TaskKeys(task_keys))
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ReceiveTaskError {
|
||||
UnknownRemote(String),
|
||||
DuplicateTask(DocumentId),
|
||||
}
|
||||
|
||||
pub mod headers {
|
||||
use std::borrow::Cow;
|
||||
use std::num::ParseIntError;
|
||||
use std::string::FromUtf8Error;
|
||||
|
||||
use milli::DocumentId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::tasks::TaskId;
|
||||
|
||||
/// Implement on response types to extract header values
|
||||
pub trait GetHeader: Sized {
|
||||
type Error: std::fmt::Debug + std::fmt::Display;
|
||||
fn get_header(&self, name: &str) -> Result<Option<&str>, Self::Error>;
|
||||
|
||||
fn get_origin_remote(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_REMOTE_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
|
||||
inner,
|
||||
header: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
})?))
|
||||
}
|
||||
|
||||
fn get_origin_task_uid(&self) -> Result<Option<TaskId>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_TASK_UID_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_ORIGIN_TASK_UID_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_origin_network_version(&self) -> Result<Option<Uuid>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_ORIGIN_NETWORK_VERSION_HEADER)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_ORIGIN_NETWORK_VERSION_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseUuid {
|
||||
inner,
|
||||
header: PROXY_ORIGIN_NETWORK_VERSION_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_remote(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_REMOTE_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
|
||||
inner,
|
||||
header: PROXY_IMPORT_REMOTE_HEADER,
|
||||
})?))
|
||||
}
|
||||
|
||||
fn get_import_index_count(&self) -> Result<Option<u64>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_INDEX_COUNT_HEADER)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_INDEX_COUNT_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_INDEX_COUNT_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_index(&self) -> Result<Option<Cow<'_, str>>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_INDEX_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(urlencoding::decode(encoded).map_err(|inner| DecodeError::UrlDecoding {
|
||||
inner,
|
||||
header: PROXY_IMPORT_INDEX_HEADER,
|
||||
})?))
|
||||
}
|
||||
|
||||
fn get_import_task_key(&self) -> Result<Option<DocumentId>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_TASK_KEY_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_TASK_KEY_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_TASK_KEY_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_docs(&self) -> Result<Option<u64>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_DOCS_HEADER)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_DOCS_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_DOCS_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
|
||||
fn get_import_index_docs(&self) -> Result<Option<u64>, DecodeError<Self>> {
|
||||
let Some(encoded) = get_header_and_legacy(self, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let decoded = urlencoding::decode(encoded).map_err(|inner| {
|
||||
DecodeError::UrlDecoding { inner, header: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER }
|
||||
})?;
|
||||
|
||||
let parsed = decoded.parse().map_err(|inner| DecodeError::ParseInt {
|
||||
inner,
|
||||
header: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
|
||||
})?;
|
||||
|
||||
Ok(Some(parsed))
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement on query types to set header values
|
||||
pub trait SetHeader: Sized {
|
||||
fn set_header(self, name: &str, value: &str) -> Self;
|
||||
|
||||
fn set_origin_remote(self, value: &str) -> Self {
|
||||
let encoded = urlencoding::encode(value);
|
||||
set_header_and_legacy(self, PROXY_ORIGIN_REMOTE_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_origin_task_uid(self, value: TaskId) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_ORIGIN_TASK_UID_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_origin_network_version(self, value: Uuid) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_ORIGIN_NETWORK_VERSION_HEADER, &encoded)
|
||||
}
|
||||
fn set_import_remote(self, value: &str) -> Self {
|
||||
let encoded = urlencoding::encode(value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_REMOTE_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_index_count(self, value: u64) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_INDEX_COUNT_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_index(self, value: &str) -> Self {
|
||||
let encoded = urlencoding::encode(value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_INDEX_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_task_key(self, value: DocumentId) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_TASK_KEY_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_docs(self, value: u64) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_DOCS_HEADER, &encoded)
|
||||
}
|
||||
|
||||
fn set_import_index_docs(self, value: u64) -> Self {
|
||||
let value = value.to_string();
|
||||
let encoded = urlencoding::encode(&value);
|
||||
set_header_and_legacy(self, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER, &encoded)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum DecodeError<T: GetHeader> {
|
||||
#[error("while getting header: {inner}")]
|
||||
InResponse { inner: T::Error, header: &'static str },
|
||||
#[error("while url-decoding: {inner}")]
|
||||
UrlDecoding { inner: FromUtf8Error, header: &'static str },
|
||||
#[error("while parsing as an integer: {inner}")]
|
||||
ParseInt { inner: ParseIntError, header: &'static str },
|
||||
#[error("while parsing as a UUID: {inner}")]
|
||||
ParseUuid { inner: uuid::Error, header: &'static str },
|
||||
}
|
||||
|
||||
impl<T: GetHeader> DecodeError<T> {
|
||||
pub fn header(&self) -> &'static str {
|
||||
match self {
|
||||
DecodeError::InResponse { inner: _, header }
|
||||
| DecodeError::UrlDecoding { inner: _, header }
|
||||
| DecodeError::ParseInt { inner: _, header }
|
||||
| DecodeError::ParseUuid { inner: _, header } => header,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "X-Meili-Proxy-Origin-Remote";
|
||||
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "X-Meili-Proxy-Origin-TaskUid";
|
||||
pub const PROXY_ORIGIN_NETWORK_VERSION_HEADER: &str = "X-Meili-Proxy-Origin-Network-Version";
|
||||
pub const PROXY_IMPORT_REMOTE_HEADER: &str = "X-Meili-Proxy-Import-Remote";
|
||||
pub const PROXY_IMPORT_INDEX_COUNT_HEADER: &str = "X-Meili-Proxy-Import-Index-Count";
|
||||
pub const PROXY_IMPORT_INDEX_HEADER: &str = "X-Meili-Proxy-Import-Index";
|
||||
pub const PROXY_IMPORT_TASK_KEY_HEADER: &str = "X-Meili-Proxy-Import-Task-Key";
|
||||
pub const PROXY_IMPORT_DOCS_HEADER: &str = "X-Meili-Proxy-Import-Docs";
|
||||
pub const PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER: &str = "X-Meili-Proxy-Import-Total-Index-Docs";
|
||||
|
||||
fn get_header_and_legacy<'a, T: GetHeader>(
|
||||
t: &'a T,
|
||||
header: &'static str,
|
||||
) -> Result<Option<&'a str>, DecodeError<T>> {
|
||||
Ok(Some(
|
||||
if let Some(encoded) =
|
||||
t.get_header(header).map_err(|inner| DecodeError::InResponse { inner, header })?
|
||||
{
|
||||
encoded
|
||||
} else {
|
||||
let header = header.strip_prefix("X-").unwrap();
|
||||
let Some(encoded) = t
|
||||
.get_header(header)
|
||||
.map_err(|inner| DecodeError::InResponse { inner, header })?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
encoded
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
fn set_header_and_legacy<T: SetHeader>(t: T, name: &'static str, value: &str) -> T {
|
||||
let t = t.set_header(name, value);
|
||||
let name = name.strip_prefix("X-").unwrap();
|
||||
t.set_header(name, value)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::DocumentId;
|
||||
|
||||
use crate::network::Remote;
|
||||
use crate::tasks::network::{ImportState, InRemote, NetworkTopologyChange, ReceiveTaskError};
|
||||
|
||||
impl NetworkTopologyChange {
|
||||
pub fn export_to_process(&self) -> Option<(&BTreeMap<String, Remote>, &str)> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn set_moved(&mut self, _moved_documents: u64) {}
|
||||
|
||||
pub fn update_state(&mut self) {}
|
||||
|
||||
pub fn receive_remote_task(
|
||||
&mut self,
|
||||
_remote_name: &str,
|
||||
_index_name: Option<&str>,
|
||||
_task_key: Option<DocumentId>,
|
||||
_document_count: u64,
|
||||
_total_indexes: u64,
|
||||
_total_index_documents: u64,
|
||||
) -> Result<(), ReceiveTaskError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn process_remote_tasks(
|
||||
&mut self,
|
||||
_remote_name: &str,
|
||||
_index_name: &str,
|
||||
_document_count: u64,
|
||||
) {
|
||||
}
|
||||
|
||||
pub fn is_import_finished(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl InRemote {
|
||||
pub fn is_finished(&self) -> bool {
|
||||
matches!(self.import_state, ImportState::Finished { .. })
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for InRemote {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
239
crates/meilisearch-types/src/tasks/network/enterprise_edition.rs
Normal file
239
crates/meilisearch-types/src/tasks/network/enterprise_edition.rs
Normal file
@@ -0,0 +1,239 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::DocumentId;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::TaskKeys;
|
||||
use crate::network::Remote;
|
||||
use crate::tasks::network::{
|
||||
ImportIndexState, ImportState, InRemote, NetworkTopologyChange, NetworkTopologyState,
|
||||
ReceiveTaskError,
|
||||
};
|
||||
|
||||
impl NetworkTopologyChange {
|
||||
pub fn export_to_process(&self) -> Option<(&BTreeMap<String, Remote>, &str)> {
|
||||
if self.state != NetworkTopologyState::ExportingDocuments {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.out_remotes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let out_name = self.out_name()?;
|
||||
Some((&self.out_remotes, out_name))
|
||||
}
|
||||
|
||||
pub fn set_moved(&mut self, moved_documents: u64) {
|
||||
self.stats.moved_documents = moved_documents;
|
||||
}
|
||||
|
||||
/// Compute the next state from the current state of the task.
|
||||
pub fn update_state(&mut self) {
|
||||
self.state = match self.state {
|
||||
NetworkTopologyState::WaitingForOlderTasks => {
|
||||
// no more older tasks, so finished waiting
|
||||
NetworkTopologyState::ExportingDocuments
|
||||
}
|
||||
NetworkTopologyState::ExportingDocuments => {
|
||||
// processed all exported documents
|
||||
if self.is_import_finished() {
|
||||
NetworkTopologyState::Finished
|
||||
} else {
|
||||
NetworkTopologyState::ImportingDocuments
|
||||
}
|
||||
}
|
||||
NetworkTopologyState::ImportingDocuments => {
|
||||
if self.is_import_finished() {
|
||||
NetworkTopologyState::Finished
|
||||
} else {
|
||||
NetworkTopologyState::ImportingDocuments
|
||||
}
|
||||
}
|
||||
NetworkTopologyState::Finished => NetworkTopologyState::Finished,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn receive_remote_task(
|
||||
&mut self,
|
||||
remote_name: &str,
|
||||
index_name: Option<&str>,
|
||||
task_key: Option<DocumentId>,
|
||||
document_count: u64,
|
||||
total_indexes: u64,
|
||||
total_index_documents: u64,
|
||||
) -> Result<(), ReceiveTaskError> {
|
||||
let remote = self
|
||||
.in_remotes
|
||||
.get_mut(remote_name)
|
||||
.ok_or_else(|| ReceiveTaskError::UnknownRemote(remote_name.to_string()))?;
|
||||
remote.import_state = match std::mem::take(&mut remote.import_state) {
|
||||
ImportState::WaitingForInitialTask => {
|
||||
if total_indexes == 0 {
|
||||
ImportState::Finished { total_indexes, total_documents: 0 }
|
||||
} else {
|
||||
let mut task_keys = RoaringBitmap::new();
|
||||
if let Some(index_name) = index_name {
|
||||
if let Some(task_key) = task_key {
|
||||
task_keys.insert(task_key);
|
||||
}
|
||||
let mut import_index_state = BTreeMap::new();
|
||||
import_index_state.insert(
|
||||
index_name.to_owned(),
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents: total_index_documents,
|
||||
received_documents: document_count,
|
||||
task_keys: TaskKeys(task_keys),
|
||||
processed_documents: 0,
|
||||
},
|
||||
);
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
} else {
|
||||
ImportState::WaitingForInitialTask
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportState::Ongoing { mut import_index_state, total_indexes } => {
|
||||
if let Some(index_name) = index_name {
|
||||
if let Some((index_name, mut index_state)) =
|
||||
import_index_state.remove_entry(index_name)
|
||||
{
|
||||
index_state = match index_state {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents: previously_received,
|
||||
processed_documents,
|
||||
mut task_keys,
|
||||
} => {
|
||||
if let Some(task_key) = task_key {
|
||||
if !task_keys.0.insert(task_key) {
|
||||
return Err(ReceiveTaskError::DuplicateTask(task_key));
|
||||
}
|
||||
}
|
||||
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents: previously_received + document_count,
|
||||
processed_documents,
|
||||
task_keys,
|
||||
}
|
||||
}
|
||||
ImportIndexState::Finished { total_documents } => {
|
||||
ImportIndexState::Finished { total_documents }
|
||||
}
|
||||
};
|
||||
import_index_state.insert(index_name, index_state);
|
||||
} else {
|
||||
let mut task_keys = RoaringBitmap::new();
|
||||
if let Some(task_key) = task_key {
|
||||
task_keys.insert(task_key);
|
||||
}
|
||||
let state = ImportIndexState::Ongoing {
|
||||
total_documents: total_index_documents,
|
||||
received_documents: document_count,
|
||||
processed_documents: 0,
|
||||
task_keys: TaskKeys(task_keys),
|
||||
};
|
||||
import_index_state.insert(index_name.to_string(), state);
|
||||
}
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
} else {
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
}
|
||||
}
|
||||
ImportState::Finished { total_indexes, total_documents } => {
|
||||
ImportState::Finished { total_indexes, total_documents }
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn process_remote_tasks(
|
||||
&mut self,
|
||||
remote_name: &str,
|
||||
index_name: &str,
|
||||
document_count: u64,
|
||||
) {
|
||||
let remote = self
|
||||
.in_remotes
|
||||
.get_mut(remote_name)
|
||||
.expect("process_remote_tasks called on a remote that is not in `in_remotes`");
|
||||
remote.import_state = match std::mem::take(&mut remote.import_state) {
|
||||
ImportState::WaitingForInitialTask => panic!("no task received yet one processed"),
|
||||
ImportState::Ongoing { mut import_index_state, total_indexes } => {
|
||||
let (index_name, mut index_state) =
|
||||
import_index_state.remove_entry(index_name).unwrap();
|
||||
index_state = match index_state {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents,
|
||||
processed_documents: previously_processed,
|
||||
task_keys,
|
||||
} => {
|
||||
let newly_processed_documents = previously_processed + document_count;
|
||||
if newly_processed_documents >= total_documents {
|
||||
ImportIndexState::Finished { total_documents }
|
||||
} else {
|
||||
ImportIndexState::Ongoing {
|
||||
total_documents,
|
||||
received_documents,
|
||||
processed_documents: newly_processed_documents,
|
||||
task_keys,
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportIndexState::Finished { total_documents } => {
|
||||
ImportIndexState::Finished { total_documents }
|
||||
}
|
||||
};
|
||||
import_index_state.insert(index_name, index_state);
|
||||
if import_index_state.len() as u64 == total_indexes
|
||||
&& import_index_state.values().all(|index| index.is_finished())
|
||||
{
|
||||
let total_documents =
|
||||
import_index_state.values().map(|index| index.total_documents()).sum();
|
||||
ImportState::Finished { total_indexes, total_documents }
|
||||
} else {
|
||||
ImportState::Ongoing { import_index_state, total_indexes }
|
||||
}
|
||||
}
|
||||
ImportState::Finished { total_indexes, total_documents } => {
|
||||
ImportState::Finished { total_indexes, total_documents }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_import_finished(&self) -> bool {
|
||||
self.in_remotes.values().all(|remote| remote.is_finished())
|
||||
}
|
||||
}
|
||||
|
||||
impl InRemote {
|
||||
pub fn is_finished(&self) -> bool {
|
||||
matches!(self.import_state, ImportState::Finished { .. })
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for InRemote {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportIndexState {
|
||||
pub fn is_finished(&self) -> bool {
|
||||
matches!(self, ImportIndexState::Finished { .. })
|
||||
}
|
||||
|
||||
fn total_documents(&self) -> u64 {
|
||||
match *self {
|
||||
ImportIndexState::Ongoing { total_documents, .. }
|
||||
| ImportIndexState::Finished { total_documents } => total_documents,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,91 +14,91 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.1"
|
||||
actix-http = { version = "3.11.0", default-features = false, features = [
|
||||
actix-http = { version = "3.11.2", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.11.0", default-features = false, features = [
|
||||
actix-web = { version = "4.12.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
anyhow = { version = "1.0.98", features = ["backtrace"] }
|
||||
bstr = "1.12.0"
|
||||
anyhow = { version = "1.0.100", features = ["backtrace"] }
|
||||
bstr = "1.12.1"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive", "env"] }
|
||||
bytes = "1.11.0"
|
||||
bumpalo = "3.19.0"
|
||||
clap = { version = "4.5.52", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
deserr = { version = "0.6.4", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.15.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
flate2 = "1.1.5"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
is-terminal = "0.4.16"
|
||||
indexmap = { version = "2.12.0", features = ["serde"] }
|
||||
is-terminal = "0.4.17"
|
||||
itertools = "0.14.0"
|
||||
jsonwebtoken = "9.3.1"
|
||||
lazy_static = "1.5.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
memmap2 = "0.9.9"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.17.0"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
parking_lot = "0.12.4"
|
||||
ordered-float = "5.1.0"
|
||||
parking_lot = "0.12.5"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.16"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.14.0", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
rayon = "1.11.0"
|
||||
regex = "1.12.2"
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
|
||||
rustls = { version = "0.23.35", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.13.0", features = ["alloc"] }
|
||||
rustls-pemfile = "2.2.0"
|
||||
segment = { version = "0.2.6" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
siphasher = "1.0.1"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
sysinfo = "0.35.2"
|
||||
static-files = { version = "0.3.1", optional = true }
|
||||
sysinfo = "0.37.2"
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
toml = "0.8.23"
|
||||
uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
toml = "0.9.8"
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4", "v7"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
url = { version = "2.5.7", features = ["serde"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.20", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.18"
|
||||
tracing-actix-web = "0.7.19"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.12"
|
||||
mopa-maintained = "0.2.3"
|
||||
@@ -114,35 +114,35 @@ utoipa = { version = "5.4.0", features = [
|
||||
utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
|
||||
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
|
||||
secrecy = "0.10.3"
|
||||
actix-web-lab = { version = "0.24.1", default-features = false }
|
||||
actix-web-lab = { version = "0.24.3", default-features = false }
|
||||
urlencoding = "2.1.3"
|
||||
backoff = { version = "0.4.0", features = ["tokio"] }
|
||||
|
||||
humantime = { version = "2.3.0", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
brotli = "8.0.1"
|
||||
actix-rt = "2.11.0"
|
||||
brotli = "8.0.2"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["redactions"] }
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
wiremock = "0.6.3"
|
||||
wiremock = "0.6.5"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.98", optional = true }
|
||||
cargo_toml = { version = "0.22.1", optional = true }
|
||||
anyhow = { version = "1.0.100", optional = true }
|
||||
cargo_toml = { version = "0.22.3", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
tempfile = { version = "3.20.0", optional = true }
|
||||
zip = { version = "4.1.0", optional = true }
|
||||
static-files = { version = "0.3.1", optional = true }
|
||||
tempfile = { version = "3.23.0", optional = true }
|
||||
zip = { version = "6.0.0", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
@@ -160,6 +160,7 @@ mini-dashboard = [
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
|
||||
enterprise = ["meilisearch-types/enterprise", "index-scheduler/enterprise"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
korean = ["meilisearch-types/korean"]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::any::TypeId;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
@@ -195,7 +195,7 @@ struct Infos {
|
||||
experimental_enable_logs_route: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_limit_batched_tasks_total_size: Option<u64>,
|
||||
experimental_network: bool,
|
||||
experimental_multimodal: bool,
|
||||
experimental_chat_completions: bool,
|
||||
@@ -344,14 +344,14 @@ impl Infos {
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_vector_store_setting: vector_store_setting,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
db_path: db_path != Path::new("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
||||
dump_dir: dump_dir != Path::new("dumps/"),
|
||||
ignore_missing_dump,
|
||||
ignore_dump_if_db_exists,
|
||||
import_snapshot: import_snapshot.is_some(),
|
||||
schedule_snapshot,
|
||||
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
||||
snapshot_dir: snapshot_dir != Path::new("snapshots/"),
|
||||
uses_s3_snapshots: s3_snapshot_options.is_some(),
|
||||
ignore_missing_snapshot,
|
||||
ignore_snapshot_if_db_exists,
|
||||
@@ -359,7 +359,7 @@ impl Infos {
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
experimental_limit_batched_tasks_total_size:
|
||||
experimental_limit_batched_tasks_total_size.into(),
|
||||
experimental_limit_batched_tasks_total_size.map(|size| size.as_u64()),
|
||||
task_queue_webhook: task_webhook_url.is_some(),
|
||||
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
||||
log_level: log_level.to_string(),
|
||||
|
||||
@@ -6,10 +6,14 @@ use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use meilisearch_types::tasks::network::headers::{
|
||||
PROXY_IMPORT_DOCS_HEADER, PROXY_IMPORT_INDEX_COUNT_HEADER, PROXY_IMPORT_INDEX_HEADER,
|
||||
PROXY_IMPORT_REMOTE_HEADER, PROXY_IMPORT_TASK_KEY_HEADER, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
|
||||
PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
};
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
@@ -93,8 +97,58 @@ pub enum MeilisearchHttpError {
|
||||
} else { PROXY_ORIGIN_TASK_UID_HEADER }
|
||||
)]
|
||||
InconsistentOriginHeaders { is_remote_missing: bool },
|
||||
#[error("Invalid value for header {header_name}: {msg}")]
|
||||
#[error("Inconsistent `Import` headers: {remote}: {remote_status}, {index}: {index_status}, {docs}: {docs_status}.\n - Hint: either all three headers should be provided, or none of them",
|
||||
remote = PROXY_IMPORT_REMOTE_HEADER,
|
||||
remote_status = if *is_remote_missing { "missing" } else{ "provided" },
|
||||
index = PROXY_IMPORT_INDEX_HEADER,
|
||||
index_status = if *is_index_missing { "missing" } else { "provided" },
|
||||
docs = PROXY_IMPORT_DOCS_HEADER,
|
||||
docs_status = if *is_docs_missing { "missing" } else { "provided" }
|
||||
)]
|
||||
InconsistentImportHeaders {
|
||||
is_remote_missing: bool,
|
||||
is_index_missing: bool,
|
||||
is_docs_missing: bool,
|
||||
},
|
||||
#[error("Inconsistent `Import-Metadata` headers: {index_count}: {index_count_status}, {task_key}: {task_key_status}, {total_index_documents}: {total_index_documents_status}.\n - Hint: either all three headers should be provided, or none of them",
|
||||
index_count = PROXY_IMPORT_INDEX_COUNT_HEADER,
|
||||
index_count_status = if *is_index_count_missing { "missing" } else { "provided"},
|
||||
task_key = PROXY_IMPORT_TASK_KEY_HEADER,
|
||||
task_key_status = if *is_task_key_missing { "missing" } else { "provided"},
|
||||
total_index_documents = PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
|
||||
total_index_documents_status = if *is_total_index_documents_missing { "missing" } else { "provided"},
|
||||
)]
|
||||
InconsistentImportMetadataHeaders {
|
||||
is_index_count_missing: bool,
|
||||
is_task_key_missing: bool,
|
||||
is_total_index_documents_missing: bool,
|
||||
},
|
||||
|
||||
#[error(
|
||||
"Inconsistent task network headers: origin headers: {origin_status}, import headers: {import_status}, import metadata: {import_metadata_status}",
|
||||
origin_status = if *is_missing_origin { "missing"} else { "present" },
|
||||
import_status = if *is_missing_import { "missing"} else { "present" },
|
||||
import_metadata_status = if *is_missing_import_metadata { "missing"} else { "present" })]
|
||||
InconsistentTaskNetworkHeaders {
|
||||
is_missing_origin: bool,
|
||||
is_missing_import: bool,
|
||||
is_missing_import_metadata: bool,
|
||||
},
|
||||
#[error("Invalid value for header `{header_name}`: {msg}")]
|
||||
InvalidHeaderValue { header_name: &'static str, msg: String },
|
||||
#[error("This remote is not the leader of the network.\n - Note: only the leader `{leader}` can receive new tasks.")]
|
||||
NotLeader { leader: String },
|
||||
#[error("Unexpected `previousRemotes` in network call.\n - Note: `previousRemote` is reserved for internal use.")]
|
||||
UnexpectedNetworkPreviousRemotes,
|
||||
#[error("The network version in request is too old.\n - Received: {received}\n - Expected at least: {expected_at_least}")]
|
||||
NetworkVersionTooOld { received: Uuid, expected_at_least: Uuid },
|
||||
#[error("Remote `{remote}` encountered an error: {error}")]
|
||||
RemoteIndexScheduler { remote: String, error: index_scheduler::Error },
|
||||
#[error("{if_remote}Already has a pending network task with uid {task_uid}.\n - Note: No network task can be registered while any previous network task is not done processing.\n - Hint: Wait for task {task_uid} to complete or cancel it.",
|
||||
if_remote=if let Some(remote) = remote {
|
||||
format!("Remote `{remote}` encountered an error: ")
|
||||
} else {"".into()} )]
|
||||
UnprocessedNetworkTask { remote: Option<String>, task_uid: meilisearch_types::tasks::TaskId },
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
@@ -122,6 +176,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
||||
MeilisearchHttpError::RemoteIndexScheduler { error, .. } => error.error_code(),
|
||||
MeilisearchHttpError::Milli { error, .. } => error.error_code(),
|
||||
MeilisearchHttpError::Payload(e) => e.error_code(),
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
@@ -142,10 +197,19 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::PersonalizationInFederatedQuery(_) => {
|
||||
Code::InvalidMultiSearchQueryPersonalization
|
||||
}
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. }
|
||||
| MeilisearchHttpError::InconsistentImportHeaders { .. }
|
||||
| MeilisearchHttpError::InconsistentImportMetadataHeaders { .. }
|
||||
| MeilisearchHttpError::InconsistentTaskNetworkHeaders { .. } => {
|
||||
Code::InconsistentDocumentChangeHeaders
|
||||
}
|
||||
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
|
||||
MeilisearchHttpError::NotLeader { .. } => Code::NotLeader,
|
||||
MeilisearchHttpError::UnexpectedNetworkPreviousRemotes => {
|
||||
Code::UnexpectedNetworkPreviousRemotes
|
||||
}
|
||||
MeilisearchHttpError::NetworkVersionTooOld { .. } => Code::NetworkVersionTooOld,
|
||||
MeilisearchHttpError::UnprocessedNetworkTask { .. } => Code::UnprocessedNetworkTask,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -169,6 +233,14 @@ impl From<aweb::error::PayloadError> for MeilisearchHttpError {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: meilisearch_types::tasks::network::headers::GetHeader>
|
||||
From<meilisearch_types::tasks::network::headers::DecodeError<T>> for MeilisearchHttpError
|
||||
{
|
||||
fn from(value: meilisearch_types::tasks::network::headers::DecodeError<T>) -> Self {
|
||||
Self::InvalidHeaderValue { header_name: value.header(), msg: value.to_string() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ActixPayloadError {
|
||||
#[error("The provided payload is incomplete and cannot be parsed")]
|
||||
|
||||
@@ -12,6 +12,7 @@ pub mod option;
|
||||
#[cfg(test)]
|
||||
mod option_test;
|
||||
pub mod personalization;
|
||||
pub mod proxy;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
pub mod search_queue;
|
||||
@@ -229,8 +230,19 @@ pub fn setup_meilisearch(
|
||||
autobatching_enabled: true,
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
export_default_payload_size_bytes: almost_as_big_as(opt.http_payload_size_limit),
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.map_or_else(
|
||||
|| {
|
||||
opt.indexer_options
|
||||
.max_indexing_memory
|
||||
// By default, we use half of the available memory to determine the size of batched tasks
|
||||
.map_or(u64::MAX, |mem| mem.as_u64() / 2)
|
||||
// And never exceed 10 GiB when we infer the limit
|
||||
.min(10 * 1024 * 1024 * 1024)
|
||||
},
|
||||
|size| size.as_u64(),
|
||||
),
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features: opt.to_instance_features(),
|
||||
@@ -329,6 +341,13 @@ pub fn setup_meilisearch(
|
||||
Ok((index_scheduler, auth_controller))
|
||||
}
|
||||
|
||||
/// Returns the input - 1MiB, or at least 20MiB
|
||||
fn almost_as_big_as(input: byte_unit::Byte) -> byte_unit::Byte {
|
||||
let with_margin = input.subtract(byte_unit::Byte::MEBIBYTE);
|
||||
let at_least = byte_unit::Byte::MEBIBYTE.multiply(20).unwrap();
|
||||
with_margin.unwrap_or(at_least).max(at_least)
|
||||
}
|
||||
|
||||
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
|
||||
fn open_or_create_database_unchecked(
|
||||
opt: &Opt,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use lazy_static::lazy_static;
|
||||
use prometheus::{
|
||||
opts, register_gauge, register_histogram_vec, register_int_counter_vec, register_int_gauge,
|
||||
register_int_gauge_vec, Gauge, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
|
||||
opts, register_gauge, register_gauge_vec, register_histogram_vec, register_int_counter_vec,
|
||||
register_int_gauge, register_int_gauge_vec, Gauge, GaugeVec, HistogramVec, IntCounterVec,
|
||||
IntGauge, IntGaugeVec,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
@@ -73,6 +74,20 @@ lazy_static! {
|
||||
&["kind", "value"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_BATCH_RUNNING_PROGRESS_TRACE: GaugeVec = register_gauge_vec!(
|
||||
opts!("meilisearch_batch_running_progress_trace", "The currently running progress trace"),
|
||||
&["batch_uid", "step_name"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS: IntGaugeVec =
|
||||
register_int_gauge_vec!(
|
||||
opts!(
|
||||
"meilisearch_last_finished_batches_progress_trace_ms",
|
||||
"The last few batches progress trace in milliseconds"
|
||||
),
|
||||
&["batch_uid", "step_name"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_LAST_UPDATE: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_last_update", "Meilisearch Last Update"))
|
||||
.expect("Can't create a metric");
|
||||
|
||||
@@ -473,11 +473,14 @@ pub struct Opt {
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_max_number_of_batched_tasks: usize,
|
||||
|
||||
/// Experimentally reduces the maximum total size, in bytes, of tasks that will be processed at once,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/801>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
|
||||
#[serde(default = "default_limit_batched_tasks_total_size")]
|
||||
pub experimental_limit_batched_tasks_total_size: Byte,
|
||||
/// Experimentally controls the maximum total size, in bytes, of tasks that will be processed
|
||||
/// simultaneously. When unspecified, defaults to half of the maximum indexing memory and
|
||||
/// clamped to 10 GiB.
|
||||
///
|
||||
/// See: <https://github.com/orgs/meilisearch/discussions/801>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE)]
|
||||
#[serde(default)]
|
||||
pub experimental_limit_batched_tasks_total_size: Option<Byte>,
|
||||
|
||||
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
|
||||
/// distinct embedder.
|
||||
@@ -701,10 +704,12 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
|
||||
experimental_max_number_of_batched_tasks.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
|
||||
experimental_limit_batched_tasks_total_size.to_string(),
|
||||
);
|
||||
if let Some(limit) = experimental_limit_batched_tasks_total_size {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
|
||||
limit.to_string(),
|
||||
);
|
||||
}
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
|
||||
experimental_embedding_cache_entries.to_string(),
|
||||
@@ -1273,10 +1278,6 @@ fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks_total_size() -> Byte {
|
||||
Byte::from_u64(u64::MAX)
|
||||
}
|
||||
|
||||
fn default_embedding_cache_entries() -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
use meilisearch_types::{
|
||||
error::{Code, ErrorCode, ResponseError},
|
||||
milli::TimeBudget,
|
||||
};
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::milli::TimeBudget;
|
||||
use rand::Rng;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
|
||||
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
|
||||
const MAX_RETRIES: u32 = 10;
|
||||
|
||||
|
||||
43
crates/meilisearch/src/proxy/body.rs
Normal file
43
crates/meilisearch/src/proxy/body.rs
Normal file
@@ -0,0 +1,43 @@
|
||||
use std::fs::File;
|
||||
|
||||
use meilisearch_types::network::Remote;
|
||||
|
||||
pub enum Body<T, F>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
NdJsonPayload(File),
|
||||
Inline(T),
|
||||
Generated(T, F),
|
||||
None,
|
||||
}
|
||||
|
||||
impl Body<(), fn(&str, &Remote, &mut ())> {
|
||||
pub fn with_ndjson_payload(file: File) -> Self {
|
||||
Self::NdJsonPayload(file)
|
||||
}
|
||||
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Body<T, fn(&str, &Remote, &mut T)>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
{
|
||||
pub fn inline(payload: T) -> Self {
|
||||
Self::Inline(payload)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F> Body<T, F>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
pub fn generated(initial: T, f: F) -> Self {
|
||||
Self::Generated(initial, f)
|
||||
}
|
||||
}
|
||||
31
crates/meilisearch/src/proxy/community_edition.rs
Normal file
31
crates/meilisearch/src/proxy/community_edition.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
use actix_web::HttpRequest;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use meilisearch_types::tasks::network::{DbTaskNetwork, TaskNetwork};
|
||||
use meilisearch_types::tasks::Task;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::proxy::Body;
|
||||
|
||||
pub fn task_network_and_check_leader_and_version(
|
||||
_req: &HttpRequest,
|
||||
_network: &Network,
|
||||
) -> Result<Option<TaskNetwork>, MeilisearchHttpError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub async fn proxy<T, F>(
|
||||
_index_scheduler: &IndexScheduler,
|
||||
_index_uid: Option<&str>,
|
||||
_req: &HttpRequest,
|
||||
_task_network: DbTaskNetwork,
|
||||
_network: Network,
|
||||
_body: Body<T, F>,
|
||||
task: &Task,
|
||||
) -> Result<Task, MeilisearchHttpError>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
Ok(task.clone())
|
||||
}
|
||||
618
crates/meilisearch/src/proxy/enterprise_edition.rs
Normal file
618
crates/meilisearch/src/proxy/enterprise_edition.rs
Normal file
@@ -0,0 +1,618 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::HttpRequest;
|
||||
use bytes::Bytes;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::network::Remote;
|
||||
use meilisearch_types::tasks::network::headers::{GetHeader, SetHeader};
|
||||
use meilisearch_types::tasks::network::{
|
||||
DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork,
|
||||
};
|
||||
use meilisearch_types::tasks::{Task, TaskId};
|
||||
use reqwest::{RequestBuilder, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::proxy::{Body, ProxyError, ReqwestErrorWithoutUrl};
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
mod timeouts {
|
||||
use std::sync::LazyLock;
|
||||
|
||||
pub static CONNECT_SECONDS: LazyLock<u64> =
|
||||
LazyLock::new(|| fetch_or_default("MEILI_EXPERIMENTAL_PROXY_CONNECT_TIMEOUT_SECONDS", 3));
|
||||
|
||||
pub static BACKOFF_SECONDS: LazyLock<u64> =
|
||||
LazyLock::new(|| fetch_or_default("MEILI_EXPERIMENTAL_PROXY_BACKOFF_TIMEOUT_SECONDS", 25));
|
||||
|
||||
pub static REQUEST_SECONDS: LazyLock<u64> =
|
||||
LazyLock::new(|| fetch_or_default("MEILI_EXPERIMENTAL_PROXY_REQUEST_TIMEOUT_SECONDS", 30));
|
||||
|
||||
fn fetch_or_default(key: &str, default: u64) -> u64 {
|
||||
match std::env::var(key) {
|
||||
Ok(timeout) => timeout.parse().unwrap_or_else(|_| {
|
||||
panic!("`{key}` environment variable is not parseable as an integer: {timeout}")
|
||||
}),
|
||||
Err(std::env::VarError::NotPresent) => default,
|
||||
Err(std::env::VarError::NotUnicode(_)) => {
|
||||
panic!("`{key}` environment variable is not set to a integer")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F> Body<T, F>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
pub fn into_bytes_iter(
|
||||
self,
|
||||
remotes: impl IntoIterator<Item = (String, Remote)>,
|
||||
) -> Result<
|
||||
impl Iterator<Item = (Option<Bytes>, (String, Remote))>,
|
||||
meilisearch_types::milli::Error,
|
||||
> {
|
||||
let bytes = match self {
|
||||
Body::NdJsonPayload(file) => {
|
||||
Some(Bytes::from_owner(unsafe { memmap2::Mmap::map(&file)? }))
|
||||
}
|
||||
|
||||
Body::Inline(payload) => {
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
|
||||
}
|
||||
|
||||
Body::None => None,
|
||||
|
||||
Body::Generated(mut initial, mut f) => {
|
||||
return Ok(either::Right(remotes.into_iter().map(move |(name, remote)| {
|
||||
f(&name, &remote, &mut initial);
|
||||
let bytes =
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&initial).unwrap()));
|
||||
(bytes, (name, remote))
|
||||
})));
|
||||
}
|
||||
};
|
||||
Ok(either::Left(std::iter::repeat(bytes).zip(remotes)))
|
||||
}
|
||||
|
||||
pub fn into_bytes(
|
||||
self,
|
||||
remote_name: &str,
|
||||
remote: &Remote,
|
||||
) -> Result<Option<Bytes>, meilisearch_types::milli::Error> {
|
||||
Ok(match self {
|
||||
Body::NdJsonPayload(file) => {
|
||||
Some(Bytes::from_owner(unsafe { memmap2::Mmap::map(&file)? }))
|
||||
}
|
||||
|
||||
Body::Inline(payload) => {
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
|
||||
}
|
||||
|
||||
Body::None => None,
|
||||
|
||||
Body::Generated(mut initial, mut f) => {
|
||||
f(remote_name, remote, &mut initial);
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&initial).unwrap()))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the header to determine if this task is a duplicate and originates with a remote.
|
||||
///
|
||||
/// If not, checks whether this remote is the leader and return `MeilisearchHttpError::NotLeader` if not.
|
||||
///
|
||||
/// If there is no leader, returns `Ok(None)`
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - `MeiliearchHttpError::NotLeader`: if the following are true simultaneously:
|
||||
/// 1. The task originates with the current node
|
||||
/// 2. There's a declared `leader`
|
||||
/// 3. The declared leader is **not** the current node
|
||||
/// - `MeilisearchHttpError::InvalidHeaderValue`: if headers cannot be parsed as a task network.
|
||||
/// - `MeilisearchHttpError::InconsistentTaskNetwork`: if only some of the headers are present.
|
||||
pub fn task_network_and_check_leader_and_version(
|
||||
req: &HttpRequest,
|
||||
network: &meilisearch_types::network::Network,
|
||||
) -> Result<Option<TaskNetwork>, MeilisearchHttpError> {
|
||||
let task_network =
|
||||
match (origin_from_req(req)?, import_data_from_req(req)?, import_metadata_from_req(req)?) {
|
||||
(Some(network_change), Some(import_from), Some(metadata)) => {
|
||||
TaskNetwork::Import { import_from, network_change, metadata }
|
||||
}
|
||||
(Some(origin), None, None) => TaskNetwork::Origin { origin },
|
||||
(None, None, None) => {
|
||||
match (network.leader.as_deref(), network.local.as_deref()) {
|
||||
// 1. Always allowed if there is no leader
|
||||
(None, _) => return Ok(None),
|
||||
// 2. Allowed if the leader is self
|
||||
(Some(leader), Some(this)) if leader == this => (),
|
||||
// 3. Any other change is disallowed
|
||||
(Some(leader), _) => {
|
||||
return Err(MeilisearchHttpError::NotLeader { leader: leader.to_string() })
|
||||
}
|
||||
}
|
||||
|
||||
TaskNetwork::Remotes {
|
||||
remote_tasks: Default::default(),
|
||||
network_version: network.version,
|
||||
}
|
||||
}
|
||||
// all good cases were matched, so this is always an error
|
||||
(origin, import_from, metadata) => {
|
||||
return Err(MeilisearchHttpError::InconsistentTaskNetworkHeaders {
|
||||
is_missing_origin: origin.is_none(),
|
||||
is_missing_import: import_from.is_none(),
|
||||
is_missing_import_metadata: metadata.is_none(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
if task_network.network_version() < network.version {
|
||||
return Err(MeilisearchHttpError::NetworkVersionTooOld {
|
||||
received: task_network.network_version(),
|
||||
expected_at_least: network.version,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Some(task_network))
|
||||
}
|
||||
|
||||
/// Updates the task description and, if necessary, proxies the passed request to the network and update the task description.
|
||||
///
|
||||
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
|
||||
/// has already been proxied.
|
||||
///
|
||||
/// - when it must proxy the request, the endpoint, method and query params are retrieved from the passed `req`, then the `body` is
|
||||
/// sent to all remotes of the `network` (except `self`). The response from the remotes are collected to update the passed `task`
|
||||
/// with the task ids from the task queues of the remotes.
|
||||
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
|
||||
/// This information is copied to the passed task.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The updated task. The task is read back from the database to avoid erasing concurrent changes.
|
||||
pub async fn proxy<T, F>(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: Option<&str>,
|
||||
req: &HttpRequest,
|
||||
mut task_network: DbTaskNetwork,
|
||||
network: meilisearch_types::network::Network,
|
||||
body: Body<T, F>,
|
||||
task: &Task,
|
||||
) -> Result<Task, MeilisearchHttpError>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
{
|
||||
if let DbTaskNetwork::Remotes { remote_tasks, network_version } = &mut task_network {
|
||||
let network_version = *network_version;
|
||||
let this = network
|
||||
.local
|
||||
.as_deref()
|
||||
.expect("inconsistent `network.leader` and `network.self`")
|
||||
.to_owned();
|
||||
|
||||
let content_type = match &body {
|
||||
// for file bodies, force x-ndjson
|
||||
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
|
||||
// otherwise get content type from request
|
||||
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
|
||||
};
|
||||
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_secs(*timeouts::CONNECT_SECONDS))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let method = from_old_http_method(req.method());
|
||||
|
||||
// send payload to all remotes
|
||||
for (body, (node_name, node)) in body
|
||||
.into_bytes_iter(network.remotes.into_iter().filter(|(name, _)| name.as_str() != this))
|
||||
.map_err(|err| {
|
||||
MeilisearchHttpError::from_milli(err, index_uid.map(ToOwned::to_owned))
|
||||
})?
|
||||
{
|
||||
tracing::trace!(node_name, "proxying task to remote");
|
||||
|
||||
let client = client.clone();
|
||||
let api_key = node.write_api_key;
|
||||
let this = this.clone();
|
||||
let task_uid = task.uid;
|
||||
let method = method.clone();
|
||||
let path_and_query = req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
|
||||
|
||||
in_flight_remote_queries.insert(
|
||||
node_name,
|
||||
tokio::spawn({
|
||||
let url = format!("{}{}", node.url, path_and_query);
|
||||
|
||||
let content_type = content_type.map(|b| b.to_owned());
|
||||
|
||||
let backoff = backoff::ExponentialBackoffBuilder::new()
|
||||
.with_max_elapsed_time(Some(std::time::Duration::from_secs(
|
||||
*timeouts::BACKOFF_SECONDS,
|
||||
)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(backoff, move || {
|
||||
let url = url.clone();
|
||||
let client = client.clone();
|
||||
let this = this.clone();
|
||||
let content_type = content_type.clone();
|
||||
|
||||
let body = body.clone();
|
||||
let api_key = api_key.clone();
|
||||
let method = method.clone();
|
||||
|
||||
async move {
|
||||
try_proxy(
|
||||
method,
|
||||
&url,
|
||||
content_type.as_deref(),
|
||||
network_version,
|
||||
api_key.as_deref(),
|
||||
&client,
|
||||
&this,
|
||||
task_uid,
|
||||
body,
|
||||
)
|
||||
.await
|
||||
}
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// wait for all in-flight queries to finish and collect their results
|
||||
for (node_name, handle) in in_flight_remote_queries {
|
||||
match handle.await {
|
||||
Ok(Ok(res)) => {
|
||||
let task_uid = res.task_uid;
|
||||
|
||||
remote_tasks.insert(node_name, Ok(task_uid).into());
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
|
||||
}
|
||||
Err(panic) => match panic.try_into_panic() {
|
||||
Ok(panic) => {
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
remote_tasks.insert(
|
||||
node_name,
|
||||
Err(ResponseError::from_msg(
|
||||
msg.to_string(),
|
||||
meilisearch_types::error::Code::Internal,
|
||||
))
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::error!("proxy task was unexpectedly cancelled")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(index_scheduler.set_task_network(task.uid, task_network)?)
|
||||
}
|
||||
|
||||
pub async fn send_request<T, F, U>(
|
||||
path_and_query: &str,
|
||||
method: reqwest::Method,
|
||||
content_type: Option<String>,
|
||||
body: Body<T, F>,
|
||||
remote_name: &str,
|
||||
remote: &Remote,
|
||||
) -> Result<U, ProxyError>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
F: FnMut(&str, &Remote, &mut T),
|
||||
U: DeserializeOwned,
|
||||
{
|
||||
let content_type = match &body {
|
||||
// for file bodies, force x-ndjson
|
||||
Body::NdJsonPayload(_) => Some("application/x-ndjson".into()),
|
||||
// otherwise get content type from request
|
||||
_ => content_type,
|
||||
};
|
||||
|
||||
let body = body.into_bytes(remote_name, remote).map_err(Box::new)?;
|
||||
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_secs(*timeouts::CONNECT_SECONDS))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let url = format!("{}{}", remote.url, path_and_query);
|
||||
|
||||
// send payload to remote
|
||||
tracing::trace!(remote_name, "sending request to remote");
|
||||
let api_key = remote.write_api_key.clone();
|
||||
|
||||
let backoff = backoff::ExponentialBackoffBuilder::new()
|
||||
.with_max_elapsed_time(Some(std::time::Duration::from_secs(*timeouts::BACKOFF_SECONDS)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(backoff, move || {
|
||||
let url = url.clone();
|
||||
let client = client.clone();
|
||||
let content_type = content_type.clone();
|
||||
|
||||
let body = body.clone();
|
||||
let api_key = api_key.clone();
|
||||
let method = method.clone();
|
||||
|
||||
async move {
|
||||
let request = client
|
||||
.request(method, url)
|
||||
.timeout(std::time::Duration::from_secs(*timeouts::REQUEST_SECONDS));
|
||||
let request = if let Some(body) = body { request.body(body) } else { request };
|
||||
let request =
|
||||
if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
|
||||
let request = if let Some(content_type) = content_type {
|
||||
request.header(CONTENT_TYPE.as_str(), content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => {
|
||||
return Err(backoff::Error::transient(ProxyError::Timeout))
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(backoff::Error::transient(ProxyError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
handle_response(response).await
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_response<U>(response: reqwest::Response) -> Result<U, backoff::Error<ProxyError>>
|
||||
where
|
||||
U: DeserializeOwned,
|
||||
{
|
||||
match response.status() {
|
||||
status_code if status_code.is_success() => (),
|
||||
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
|
||||
return Err(backoff::Error::Permanent(ProxyError::AuthenticationError))
|
||||
}
|
||||
status_code if status_code.is_client_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::Permanent(ProxyError::BadRequest {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code if status_code.is_server_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::transient(ProxyError::RemoteError {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code => {
|
||||
tracing::warn!(
|
||||
status_code = status_code.as_u16(),
|
||||
"remote replied with unexpected status code"
|
||||
);
|
||||
}
|
||||
}
|
||||
let response: U = match parse_response(response).await {
|
||||
Ok(response) => response,
|
||||
Err(response) => {
|
||||
return Err(backoff::Error::permanent(ProxyError::CouldNotParseResponse { response }))
|
||||
}
|
||||
};
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
|
||||
match method {
|
||||
&actix_http::Method::CONNECT => reqwest::Method::CONNECT,
|
||||
&actix_http::Method::DELETE => reqwest::Method::DELETE,
|
||||
&actix_http::Method::GET => reqwest::Method::GET,
|
||||
&actix_http::Method::HEAD => reqwest::Method::HEAD,
|
||||
&actix_http::Method::OPTIONS => reqwest::Method::OPTIONS,
|
||||
&actix_http::Method::PATCH => reqwest::Method::PATCH,
|
||||
&actix_http::Method::POST => reqwest::Method::POST,
|
||||
&actix_http::Method::PUT => reqwest::Method::PUT,
|
||||
&actix_http::Method::TRACE => reqwest::Method::TRACE,
|
||||
method => reqwest::Method::from_bytes(method.as_str().as_bytes()).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn try_proxy(
|
||||
method: reqwest::Method,
|
||||
url: &str,
|
||||
content_type: Option<&[u8]>,
|
||||
network_version: Uuid,
|
||||
api_key: Option<&str>,
|
||||
client: &reqwest::Client,
|
||||
this: &str,
|
||||
task_uid: TaskId,
|
||||
body: Option<Bytes>,
|
||||
) -> Result<SummarizedTaskView, backoff::Error<ProxyError>> {
|
||||
let request = client
|
||||
.request(method, url)
|
||||
.timeout(std::time::Duration::from_secs(*timeouts::REQUEST_SECONDS));
|
||||
let request = if let Some(body) = body { request.body(body) } else { request };
|
||||
let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
|
||||
let RequestWrapper(request) = RequestWrapper(request)
|
||||
.set_origin_task_uid(task_uid)
|
||||
.set_origin_network_version(network_version)
|
||||
.set_origin_remote(this);
|
||||
|
||||
let request = if let Some(content_type) = content_type {
|
||||
request.header(CONTENT_TYPE.as_str(), content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => {
|
||||
return Err(backoff::Error::transient(ProxyError::Timeout))
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(backoff::Error::transient(ProxyError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
handle_response(response).await
|
||||
}
|
||||
|
||||
struct RequestWrapper(RequestBuilder);
|
||||
impl meilisearch_types::tasks::network::headers::SetHeader for RequestWrapper {
|
||||
fn set_header(self, name: &str, value: &str) -> Self {
|
||||
Self(self.0.header(name, value))
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_error(response: reqwest::Response) -> Result<String, ReqwestErrorWithoutUrl> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
|
||||
};
|
||||
|
||||
Ok(parse_bytes_as_error(&bytes))
|
||||
}
|
||||
|
||||
fn parse_bytes_as_error(bytes: &[u8]) -> String {
|
||||
match serde_json::from_slice::<Value>(bytes) {
|
||||
Ok(value) => value.to_string(),
|
||||
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_response<T: DeserializeOwned>(
|
||||
response: reqwest::Response,
|
||||
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
|
||||
};
|
||||
|
||||
match serde_json::from_slice::<T>(&bytes) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
|
||||
}
|
||||
}
|
||||
|
||||
struct ResponseWrapper<'a>(&'a HttpRequest);
|
||||
impl<'a> meilisearch_types::tasks::network::headers::GetHeader for ResponseWrapper<'a> {
|
||||
type Error = actix_http::header::ToStrError;
|
||||
|
||||
fn get_header(&self, name: &str) -> Result<Option<&str>, Self::Error> {
|
||||
self.0.headers().get(name).map(|value| value.to_str()).transpose()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
|
||||
let req = ResponseWrapper(req);
|
||||
let (remote_name, task_uid, network_version) = match (
|
||||
req.get_origin_remote()?,
|
||||
req.get_origin_task_uid()?,
|
||||
req.get_origin_network_version()?,
|
||||
) {
|
||||
(None, None, _) => return Ok(None),
|
||||
(None, Some(_), _) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
|
||||
}
|
||||
(Some(_), None, _) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
|
||||
is_remote_missing: false,
|
||||
})
|
||||
}
|
||||
(Some(remote_name), Some(task_uid), network_version) => {
|
||||
(remote_name, task_uid, network_version)
|
||||
}
|
||||
};
|
||||
|
||||
let network_version = network_version.unwrap_or_else(Uuid::nil);
|
||||
|
||||
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid, network_version }))
|
||||
}
|
||||
|
||||
pub fn import_data_from_req(req: &HttpRequest) -> Result<Option<ImportData>, MeilisearchHttpError> {
|
||||
let req = ResponseWrapper(req);
|
||||
let (remote_name, index_name, document_count) =
|
||||
match (req.get_import_remote()?, req.get_import_index()?, req.get_import_docs()?) {
|
||||
(None, None, None) => return Ok(None),
|
||||
(Some(remote_name), index_name, Some(documents)) => {
|
||||
(remote_name, index_name, documents)
|
||||
}
|
||||
// catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some)
|
||||
(remote_name, index_name, documents) => {
|
||||
return Err(MeilisearchHttpError::InconsistentImportHeaders {
|
||||
is_remote_missing: remote_name.is_none(),
|
||||
is_index_missing: index_name.is_none(),
|
||||
is_docs_missing: documents.is_none(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(ImportData {
|
||||
remote_name: remote_name.to_string(),
|
||||
index_name: index_name.map(|index_name| index_name.to_string()),
|
||||
document_count,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn import_metadata_from_req(
|
||||
req: &HttpRequest,
|
||||
) -> Result<Option<ImportMetadata>, MeilisearchHttpError> {
|
||||
let req = ResponseWrapper(req);
|
||||
let (index_count, task_key, total_index_documents) = match (
|
||||
req.get_import_index_count()?,
|
||||
req.get_import_task_key()?,
|
||||
req.get_import_index_docs()?,
|
||||
) {
|
||||
(None, None, None) => return Ok(None),
|
||||
(Some(index_count), task_key, Some(total_index_documents)) => {
|
||||
(index_count, task_key, total_index_documents)
|
||||
}
|
||||
// catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some)
|
||||
(index_count, task_key, total_index_documents) => {
|
||||
return Err(MeilisearchHttpError::InconsistentImportMetadataHeaders {
|
||||
is_index_count_missing: index_count.is_none(),
|
||||
is_task_key_missing: task_key.is_none(),
|
||||
is_total_index_documents_missing: total_index_documents.is_none(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(ImportMetadata { index_count, task_key, total_index_documents }))
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user