Compare commits

..

26 Commits

Author SHA1 Message Date
Clément Renault
0c37ec37c7 Introduce more databases to measure 2025-11-25 09:56:32 +01:00
Kerollmops
da3f08a479 WIP: Clean up tests 2025-11-10 10:16:49 +01:00
Kerollmops
5b19df1dba Fix compressed block size bug 2025-11-09 12:52:49 +01:00
Kerollmops
2ca596003a Fix a decoding bug with flat u32s 2025-11-09 12:29:45 +01:00
Kerollmops
9b31c09dde Fix assert 2025-11-09 12:07:08 +01:00
Kerollmops
74a587785a Finalize a first version 2025-11-08 17:32:38 +01:00
Clément Renault
d612ea2a90 Fix the initial value when delta-encoding 2025-11-01 12:20:25 +01:00
Kerollmops
63a7fe5586 Fix a delta encoding bug 2025-10-31 14:16:22 +01:00
Kerollmops
53120eb2a4 Add an info about num bits for delta encoding 2025-10-31 13:58:02 +01:00
Kerollmops
19e512622e Average distance between bitmap values 2025-10-31 13:08:23 +01:00
Kerollmops
86e5f74fce Fix some display issues 2025-10-31 12:00:54 +01:00
Kerollmops
a73f635013 Fix dumb issue 2025-10-31 11:50:42 +01:00
Kerollmops
10aac4d77f Add a progress bar to the meilitool bitmap measurements 2025-10-31 11:45:19 +01:00
Kerollmops
aa2f649713 Use multiple threads to compute bitmap stats 2025-10-31 11:38:54 +01:00
Clément Renault
a1f266dc03 Add average number of values metric to roaring bitmap analysis 2025-10-31 09:39:09 +01:00
Clément Renault
566bb51eda Indicated the number of CBOs 2025-10-30 22:08:27 +01:00
Clément Renault
c37396714d Add more info 2025-10-30 11:59:07 +01:00
Clément Renault
c5473dc2b5 More logs 2025-10-29 18:09:48 +01:00
Clément Renault
3cdc7f2de4 Evaluate delta encoding 2025-10-29 18:07:07 +01:00
Clément Renault
343bae478a Display more info about the new bitmap infos 2025-10-29 10:04:58 +01:00
Clément Renault
8b41f1a69d Show the percentage in percent 2025-10-29 09:58:22 +01:00
Clément Renault
59a2f8d0ab Adding new commands to meilitool 2025-10-28 18:44:45 +01:00
Clément Renault
508be2137e Compute the ratio of bitset containers 2025-10-28 15:32:32 +01:00
Clément Renault
50bf485dc0 Reduce the number of displayed decimals 2025-10-28 15:17:13 +01:00
Clément Renault
6e4855bbc5 Adjuste bytes to ease reading 2025-10-28 15:14:00 +01:00
Clément Renault
ac5da77746 Add a meilitool command to compute the gain to use new roaring bitmaps 2025-10-28 11:58:31 +01:00
376 changed files with 5536 additions and 17418 deletions

View File

@@ -24,11 +24,6 @@ TBD
- [ ] If not, add the `no db change` label to your PR, and you're good to merge. - [ ] If not, add the `no db change` label to your PR, and you're good to merge.
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do. - [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
### Reminders when adding features
- [ ] Write unit tests using insta
- [ ] Write declarative integration tests in [workloads/tests](https://github.com/meilisearch/meilisearch/tree/main/workloads/test). Specify the routes to call and then call `cargo xtask test workloads/tests/YOUR_TEST.json --update-responses` so that responses are automatically filled.
### Reminders when modifying the API ### Reminders when modifying the API
- [ ] Update the openAPI file with utoipa: - [ ] Update the openAPI file with utoipa:

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h timeout-minutes: 180 # 3h
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with: with:
profile: minimal profile: minimal

View File

@@ -66,7 +66,9 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }} ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- name: Run benchmarks on PR ${{ github.event.issue.id }} - name: Run benchmarks on PR ${{ github.event.issue.id }}
run: | run: |

View File

@@ -12,7 +12,9 @@ jobs:
timeout-minutes: 180 # 3h timeout-minutes: 180 # 3h
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
# Run benchmarks # Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }} - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with: with:
profile: minimal profile: minimal

View File

@@ -44,7 +44,7 @@ jobs:
exit 1 exit 1
fi fi
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with: with:
profile: minimal profile: minimal

View File

@@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with: with:
profile: minimal profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with: with:
profile: minimal profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with: with:
profile: minimal profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with: with:
profile: minimal profile: minimal

View File

@@ -19,7 +19,6 @@ env:
- [ ] Detail the change to the DB format and why they are forward compatible - [ ] Detail the change to the DB format and why they are forward compatible
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR. - [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
## This PR makes breaking changes ## This PR makes breaking changes
@@ -36,7 +35,8 @@ env:
- [ ] Write the code to go from the old database to the new one - [ ] Write the code to go from the old database to the new one
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47) - If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13) - If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage) - [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
jobs: jobs:
add-comment: add-comment:

View File

@@ -3,7 +3,7 @@ name: Look for flaky tests
on: on:
workflow_dispatch: workflow_dispatch:
schedule: schedule:
- cron: "0 4 * * *" # Every day at 4:00AM - cron: '0 4 * * *' # Every day at 4:00AM
jobs: jobs:
flaky: flaky:
@@ -13,17 +13,11 @@ jobs:
image: ubuntu:22.04 image: ubuntu:22.04
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install needed dependencies - name: Install needed dependencies
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
- name: Install cargo-flaky - name: Install cargo-flaky
run: cargo install cargo-flaky run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps - name: Run cargo flaky in the dumps

View File

@@ -12,7 +12,9 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
# Run benchmarks # Run benchmarks
- name: Run the fuzzer - name: Run the fuzzer

View File

@@ -25,13 +25,7 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709 - uses: dtolnay/rust-toolchain@1.89
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install cargo-deb - name: Install cargo-deb
run: cargo install cargo-deb run: cargo install cargo-deb
- uses: actions/checkout@v5 - uses: actions/checkout@v5

View File

@@ -14,105 +14,10 @@ on:
workflow_dispatch: workflow_dispatch:
jobs: jobs:
build: docker:
runs-on: ${{ matrix.runner }} runs-on: docker
strategy:
matrix:
platform: [amd64, arm64]
edition: [community, enterprise]
include:
- platform: amd64
runner: ubuntu-24.04
- platform: arm64
runner: ubuntu-24.04-arm
- edition: community
registry: getmeili/meilisearch
feature-flag: ""
- edition: enterprise
registry: getmeili/meilisearch-enterprise
feature-flag: "--features enterprise"
permissions: {}
steps:
- uses: actions/checkout@v5
- name: Prepare
run: |
platform=linux/${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
platforms: linux/${{ matrix.platform }}
install: true
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ matrix.registry }}
# Prevent `latest` to be updated for each new tag pushed.
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
flavor: latest=false
tags: |
type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push by digest
uses: docker/build-push-action@v6
id: build-and-push
with:
platforms: linux/${{ matrix.platform }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ matrix.registry }}
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
EXTRA_ARGS=${{ matrix.feature-flag }}
- name: Export digest
run: |
mkdir -p ${{ runner.temp }}/digests
digest="${{ steps.build-and-push.outputs.digest }}"
touch "${{ runner.temp }}/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ matrix.edition }}-${{ env.PLATFORM_PAIR }}
path: ${{ runner.temp }}/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
strategy:
matrix:
edition: [community, enterprise]
include:
- edition: community
registry: getmeili/meilisearch
- edition: enterprise
registry: getmeili/meilisearch-enterprise
needs:
- build
permissions: permissions:
id-token: write # This is needed to use Cosign in keyless mode id-token: write # This is needed to use Cosign in keyless mode
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
@@ -153,30 +58,26 @@ jobs:
echo "date=$commit_date" >> $GITHUB_OUTPUT echo "date=$commit_date" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install cosign - name: Install cosign
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0 uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
- name: Download digests
uses: actions/download-artifact@v4
with:
path: ${{ runner.temp }}/digests
pattern: digests-${{ matrix.edition }}-*
merge-multiple: true
- name: Login to Docker Hub - name: Login to Docker Hub
uses: docker/login-action@v3 uses: docker/login-action@v3
with: with:
username: ${{ secrets.DOCKERHUB_USERNAME }} username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: docker/metadata-action@v5 uses: docker/metadata-action@v5
with: with:
images: ${{ matrix.registry }} images: getmeili/meilisearch
# Prevent `latest` to be updated for each new tag pushed. # Prevent `latest` to be updated for each new tag pushed.
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases. # We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
flavor: latest=false flavor: latest=false
@@ -187,31 +88,33 @@ jobs:
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }} type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }} type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Create manifest list and push - name: Build and push
working-directory: ${{ runner.temp }}/digests uses: docker/build-push-action@v6
run: | id: build-and-push
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ with:
$(printf '${{ matrix.registry }}@sha256:%s ' *) push: true
platforms: linux/amd64,linux/arm64
- name: Inspect image to fetch digest to sign tags: ${{ steps.meta.outputs.tags }}
run: | build-args: |
digest=$(docker buildx imagetools inspect --format='{{ json .Manifest }}' ${{ matrix.registry }}:${{ steps.meta.outputs.version }} | jq -r '.digest') COMMIT_SHA=${{ github.sha }}
echo "DIGEST=${digest}" >> $GITHUB_ENV COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
- name: Sign the images with GitHub OIDC Token - name: Sign the images with GitHub OIDC Token
env: env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
TAGS: ${{ steps.meta.outputs.tags }} TAGS: ${{ steps.meta.outputs.tags }}
run: | run: |
images="" images=""
for tag in ${TAGS}; do for tag in ${TAGS}; do
images+="${tag}@${{ env.DIGEST }} " images+="${tag}@${DIGEST} "
done done
cosign sign --yes ${images} cosign sign --yes ${images}
# /!\ Don't touch this without checking with engineers working on the Cloud code base on #discussion-engineering Slack channel # /!\ Don't touch this without checking with Cloud team
- name: Notify meilisearch-cloud - name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event) # Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: ${{ (github.event_name == 'push') && (matrix.edition == 'enterprise') }} if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v3 uses: peter-evans/repository-dispatch@v3
with: with:
token: ${{ secrets.MEILI_BOT_GH_PAT }} token: ${{ secrets.MEILI_BOT_GH_PAT }}
@@ -219,13 +122,21 @@ jobs:
event-type: cloud-docker-build event-type: cloud-docker-build
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }' client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
# /!\ Don't touch this without checking with integration team members on #discussion-integrations Slack channel # Send notification to Swarmia to notify of a deployment: https://app.swarmia.com
- name: Notify meilisearch-kubernetes # - name: 'Setup jq'
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event), or if not stable # uses: dcarbone/install-jq-action
if: ${{ github.event_name == 'push' && matrix.edition == 'community' && steps.check-tag-format.outputs.stable == 'true' }} # - name: Send deployment to Swarmia
uses: peter-evans/repository-dispatch@v3 # if: github.event_name == 'push' && success()
with: # run: |
token: ${{ secrets.MEILI_BOT_GH_PAT }} # JSON_STRING=$( jq --null-input --compact-output \
repository: meilisearch/meilisearch-kubernetes # --arg version "${{ github.ref_name }}" \
event-type: meilisearch-release # --arg appName "meilisearch" \
client-payload: '{ "version": "${{ github.ref_name }}" }' # --arg environment "production" \
# --arg commitSha "${{ github.sha }}" \
# --arg repositoryFullName "${{ github.repository }}" \
# '{"version": $version, "appName": $appName, "environment": $environment, "commitSha": $commitSha, "repositoryFullName": $repositoryFullName}' )
# curl -H "Authorization: ${{ secrets.SWARMIA_DEPLOYMENTS_AUTHORIZATION }}" \
# -H "Content-Type: application/json" \
# -d "$JSON_STRING" \
# https://hook.swarmia.com/deployments

View File

@@ -32,65 +32,161 @@ jobs:
if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true' if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true'
run: bash .github/scripts/check-release.sh run: bash .github/scripts/check-release.sh
publish-binaries: publish-linux:
name: Publish binary for ${{ matrix.release }} ${{ matrix.edition }} edition name: Publish binary for Linux
runs-on: ${{ matrix.os }} runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
edition: [community, enterprise]
release:
[macos-amd64, macos-aarch64, windows, linux-amd64, linux-aarch64]
include:
- edition: "community"
feature-flag: ""
edition-suffix: ""
- edition: "enterprise"
feature-flag: "--features enterprise"
edition-suffix: "enterprise-"
- release: macos-amd64
os: macos-15-intel
binary_path: release/meilisearch
asset_name: macos-amd64
extra-args: ""
- release: macos-aarch64
os: macos-14
binary_path: aarch64-apple-darwin/release/meilisearch
asset_name: macos-apple-silicon
extra-args: "--target aarch64-apple-darwin"
- release: windows
os: windows-2022
binary_path: release/meilisearch.exe
asset_name: windows-amd64.exe
extra-args: ""
- release: linux-amd64
os: ubuntu-22.04
binary_path: x86_64-unknown-linux-gnu/release/meilisearch
asset_name: linux-amd64
extra-args: "--target x86_64-unknown-linux-gnu"
- release: linux-aarch64
os: ubuntu-22.04-arm
binary_path: aarch64-unknown-linux-gnu/release/meilisearch
asset_name: linux-aarch64
extra-args: "--target aarch64-unknown-linux-gnu"
needs: check-version needs: check-version
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.91.1 - name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.89
- name: Build - name: Build
run: cargo build --release --locked ${{ matrix.feature-flag }} ${{ matrix.extra-args }} run: cargo build --release --locked
# No need to upload binaries for dry run (cron or workflow_dispatch) # No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release - name: Upload binaries to release
if: github.event_name == 'release' if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2 uses: svenstaro/upload-release-action@2.11.2
with: with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.binary_path }} file: target/release/meilisearch
asset_name: meilisearch-${{ matrix.edition-suffix }}${{ matrix.asset_name }} asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }} tag: ${{ github.ref }}
publish-openapi-files: publish-macos-windows:
name: Publish OpenAPI files name: Publish binary for ${{ matrix.os }}
runs-on: ${{ matrix.os }}
needs: check-version
strategy:
fail-fast: false
matrix:
os: [macos-13, windows-2022]
include:
- os: macos-13
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
- os: windows-2022
artifact_name: meilisearch.exe
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-macos-apple-silicon:
name: Publish binary for macOS silicon
runs-on: macos-13
needs: check-version
strategy:
matrix:
include:
- target: aarch64-apple-darwin
asset_name: meilisearch-macos-apple-silicon
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
args: --release --target ${{ matrix.target }}
- name: Upload the binary to release
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-aarch64:
name: Publish binary for aarch64
runs-on: ubuntu-latest
needs: check-version
env:
DEBIAN_FRONTEND: noninteractive
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
strategy:
matrix:
include:
- target: aarch64-unknown-linux-gnu
asset_name: meilisearch-linux-aarch64
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Install needed dependencies
run: |
apt-get update -y && apt upgrade -y
apt-get install -y curl build-essential gcc-aarch64-linux-gnu
- name: Set up Docker for cross compilation
run: |
apt-get install -y curl apt-transport-https ca-certificates software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
- name: Configure target aarch64 GNU
## Environment variable is not passed using env:
## LD gold won't work with MUSL
# env:
# JEMALLOC_SYS_WITH_LG_PAGE: 16
# RUSTFLAGS: '-Clink-arg=-fuse-ld=gold'
run: |
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
- name: Install a default toolchain that will be used to build cargo cross
run: |
rustup default stable
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
use-cross: true
args: --release --target ${{ matrix.target }}
env:
CROSS_DOCKER_IN_DOCKER: true
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-openapi-file:
name: Publish OpenAPI file
needs: check-version needs: check-version
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
@@ -101,26 +197,16 @@ jobs:
with: with:
toolchain: stable toolchain: stable
override: true override: true
- name: Generate OpenAPI files - name: Generate OpenAPI file
run: | run: |
cd crates/openapi-generator cd crates/openapi-generator
cargo run --release -- --pretty --debug --output ../../meilisearch-openapi.json cargo run --release -- --pretty --output ../../meilisearch.json
cargo run --release -- --pretty --debug --with-mintlify-code-samples --output ../../meilisearch-openapi-mintlify.json - name: Upload OpenAPI to Release
- name: Upload OpenAPI file to Release
# No need to upload for dry run (cron or workflow_dispatch) # No need to upload for dry run (cron or workflow_dispatch)
if: github.event_name == 'release' if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2 uses: svenstaro/upload-release-action@2.11.2
with: with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: ./meilisearch-openapi.json file: ./meilisearch.json
asset_name: meilisearch-openapi.json asset_name: meilisearch-openapi.json
tag: ${{ github.ref }} tag: ${{ github.ref }}
- name: Upload Mintlify OpenAPI file to Release
# No need to upload for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: ./meilisearch-openapi-mintlify.json
asset_name: meilisearch-openapi-mintlify.json
tag: ${{ github.ref }}

View File

@@ -25,18 +25,14 @@ jobs:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Define the Docker image we need to use - name: Define the Docker image we need to use
id: define-image id: define-image
env:
EVENT_NAME: ${{ github.event_name }}
DOCKER_IMAGE_INPUT: ${{ github.event.inputs.docker_image }}
run: | run: |
event=${{ github.event_name }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ "$EVENT_NAME" == 'workflow_dispatch' ]]; then if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=$DOCKER_IMAGE_INPUT" >> $GITHUB_OUTPUT echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
fi fi
- name: Docker image is ${{ steps.define-image.outputs.docker-image }} - name: Docker image is ${{ steps.define-image.outputs.docker-image }}
env: run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"
DOCKER_IMAGE: ${{ steps.define-image.outputs.docker-image }}
run: echo "Docker image is $DOCKER_IMAGE"
########## ##########
## SDKs ## ## SDKs ##
@@ -72,7 +68,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -96,7 +92,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -126,7 +122,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -153,7 +149,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -188,7 +184,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -217,7 +213,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -242,7 +238,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -267,7 +263,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -288,7 +284,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -311,7 +307,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -342,7 +338,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -374,7 +370,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
meilisearch: meilisearch:
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }} image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env: env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}

View File

@@ -15,124 +15,83 @@ env:
jobs: jobs:
test-linux: test-linux:
name: Tests on ${{ matrix.runner }} ${{ matrix.features }} name: Tests on ubuntu-22.04
runs-on: ${{ matrix.runner }} runs-on: ubuntu-latest
strategy: container:
matrix: # Use ubuntu-22.04 to compile with glibc 2.35
runner: [ubuntu-22.04, ubuntu-22.04-arm] image: ubuntu:22.04
features: ["", "--features enterprise"]
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: check free space before - name: Install needed dependencies
run: df -h
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: | run: |
sudo rm -rf "/opt/ghc" || true apt-get update && apt-get install -y curl
sudo rm -rf "/usr/share/dotnet" || true apt-get install build-essential -y
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: check free space after
run: df -h
- name: Setup test with Rust stable - name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.91.1 uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0 uses: Swatinem/rust-cache@v2.8.0
with: - name: Run cargo check without any default features
key: ${{ matrix.features }}
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: build command: build
args: --locked --no-default-features --all args: --locked --release --no-default-features --all
- name: Run cargo test - name: Run cargo test
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: test command: test
args: --locked --all ${{ matrix.features }} args: --locked --release --all
test-windows: test-others:
name: Tests on ${{ matrix.os }} name: Tests on ${{ matrix.os }}
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
os: [windows-2022] os: [macos-13, windows-2022]
features: ["", "--features enterprise"]
if: github.event_name != 'merge_group'
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0 uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.91.1 - uses: dtolnay/rust-toolchain@1.89
- name: Run cargo build without any default features - name: Run cargo check without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: build command: build
args: --locked --no-default-features --all args: --locked --release --no-default-features --all
- name: Run cargo test - name: Run cargo test
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: test command: test
args: --locked --all ${{ matrix.features }} args: --locked --release --all
test-macos:
name: Tests on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-14]
features: ["", "--features enterprise"]
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --all ${{ matrix.features }}
test-all-features: test-all-features:
name: Tests almost all features name: Tests almost all features
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709 - name: Install needed dependencies
run: | run: |
sudo rm -rf "/opt/ghc" || true apt-get update
sudo rm -rf "/usr/share/dotnet" || true apt-get install --assume-yes build-essential curl
sudo rm -rf "/usr/local/lib/android" || true - uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo build with almost all features - name: Run cargo build with almost all features
run: | run: |
cargo build --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)" cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
- name: Run cargo test with almost all features - name: Run cargo test with almost all features
run: | run: |
cargo test --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)" cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
ollama-ubuntu: ollama-ubuntu:
name: Test with Ollama name: Test with Ollama
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
env: env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434" MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install Ollama - name: Install Ollama
run: | run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh curl -fsSL https://ollama.com/install.sh | sudo -E sh
@@ -156,21 +115,21 @@ jobs:
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: test command: test
args: --locked -p meilisearch --features test-ollama ollama args: --locked --release --all --features test-ollama ollama
test-disabled-tokenization: test-disabled-tokenization:
name: Test disabled tokenization name: Test disabled tokenization
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
container:
image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709 - name: Install needed dependencies
run: | run: |
sudo rm -rf "/opt/ghc" || true apt-get update
sudo rm -rf "/usr/share/dotnet" || true apt-get install --assume-yes build-essential curl
sudo rm -rf "/usr/local/lib/android" || true - uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo tree without default features and check lindera is not present - name: Run cargo tree without default features and check lindera is not present
run: | run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -181,39 +140,36 @@ jobs:
run: | run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz cargo tree -f '{p} {f}' -e normal | grep lindera -qz
build: # We run tests in debug also, to make sure that the debug_assertions are hit
name: Build in release test-debug:
runs-on: ubuntu-22.04 name: Run tests in debug
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709 - name: Install needed dependencies
run: | run: |
sudo rm -rf "/opt/ghc" || true apt-get update && apt-get install -y curl
sudo rm -rf "/usr/share/dotnet" || true apt-get install build-essential -y
sudo rm -rf "/usr/local/lib/android" || true - uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0 uses: Swatinem/rust-cache@v2.8.0
- name: Build - name: Run tests in debug
run: cargo build --release --locked --target x86_64-unknown-linux-gnu uses: actions-rs/cargo@v1
with:
command: test
args: --locked --all
clippy: clippy:
name: Run Clippy name: Run Clippy
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
strategy:
matrix:
features: ["", "--features enterprise"]
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709 - uses: dtolnay/rust-toolchain@1.89
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
with: with:
profile: minimal
components: clippy components: clippy
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0 uses: Swatinem/rust-cache@v2.8.0
@@ -221,21 +177,18 @@ jobs:
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: clippy command: clippy
args: --all-targets ${{ matrix.features }} -- --deny warnings -D clippy::todo args: --all-targets -- --deny warnings
fmt: fmt:
name: Run Rustfmt name: Run Rustfmt
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709 - uses: dtolnay/rust-toolchain@1.89
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
with: with:
profile: minimal
toolchain: nightly-2024-07-09
override: true
components: rustfmt components: rustfmt
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0 uses: Swatinem/rust-cache@v2.8.0
@@ -246,23 +199,3 @@ jobs:
run: | run: |
echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs
cargo fmt --all -- --check cargo fmt --all -- --check
declarative-tests:
name: Run declarative tests
runs-on: ubuntu-22.04-arm
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run declarative tests
run: |
cargo xtask test workloads/tests/*.json

View File

@@ -18,13 +18,9 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709 - uses: dtolnay/rust-toolchain@1.89
run: | with:
sudo rm -rf "/opt/ghc" || true profile: minimal
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install sd - name: Install sd
run: cargo install sd run: cargo install sd
- name: Update Cargo.toml file - name: Update Cargo.toml file

3
.gitignore vendored
View File

@@ -29,6 +29,3 @@ crates/meilisearch/db.snapshot
# Fuzzcheck data for the facet indexing fuzz test # Fuzzcheck data for the facet indexing fuzz test
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/ crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
# OpenAPI generator
**/meilisearch-openapi.json

View File

@@ -124,7 +124,6 @@ They are JSON files with the following structure (comments are not actually supp
{ {
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard. // Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
"name": "hackernews.ndjson_1M,no-threads", "name": "hackernews.ndjson_1M,no-threads",
"type": "bench",
// Number of consecutive runs of the commands that should be performed. // Number of consecutive runs of the commands that should be performed.
// Each run uses a fresh instance of Meilisearch and a fresh database. // Each run uses a fresh instance of Meilisearch and a fresh database.
// Each run produces its own report file. // Each run produces its own report file.

View File

@@ -117,7 +117,7 @@ With swagger:
With the internal crate: With the internal crate:
```bash ```bash
cd crates/openapi-generator cd crates/openapi-generator
cargo run --release -- --pretty cargo run --release -- --pretty --output meilisearch.json
``` ```
### Logging ### Logging

2296
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,7 +23,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.30.0" version = "1.24.0"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",
@@ -50,5 +50,3 @@ opt-level = 3
opt-level = 3 opt-level = 3
[profile.dev.package.roaring] [profile.dev.package.roaring]
opt-level = 3 opt-level = 3
[profile.dev.package.gemm-f16]
opt-level = 3

7
Cross.toml Normal file
View File

@@ -0,0 +1,7 @@
[build.env]
passthrough = [
"RUST_BACKTRACE",
"CARGO_TERM_COLOR",
"RUSTFLAGS",
"JEMALLOC_SYS_WITH_LG_PAGE"
]

View File

@@ -8,14 +8,16 @@ WORKDIR /
ARG COMMIT_SHA ARG COMMIT_SHA
ARG COMMIT_DATE ARG COMMIT_DATE
ARG GIT_TAG ARG GIT_TAG
ARG EXTRA_ARGS
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG} ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static" ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . . COPY . .
RUN set -eux; \ RUN set -eux; \
apkArch="$(apk --print-arch)"; \ apkArch="$(apk --print-arch)"; \
cargo build --release -p meilisearch -p meilitool ${EXTRA_ARGS} if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release -p meilisearch -p meilitool
# Run # Run
FROM alpine:3.22 FROM alpine:3.22

View File

@@ -39,7 +39,6 @@
## 🖥 Examples ## 🖥 Examples
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos). - [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
- [**Flickr**](https://flickr.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — Search and explore one hundred million Flickr images with semantic search.
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination. - [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs. - [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application. - [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
@@ -104,7 +103,6 @@ Meilisearch is available in two editions:
- Includes advanced features such as: - Includes advanced features such as:
- Sharding - Sharding
- S3-streaming snapshots
- Governed by a [commercial license](./LICENSE-EE) or the [Business Source License 1.1](https://mariadb.com/bsl11) - Governed by a [commercial license](./LICENSE-EE) or the [Business Source License 1.1](https://mariadb.com/bsl11)
- Not allowed in production without a commercial agreement with Meilisearch. - Not allowed in production without a commercial agreement with Meilisearch.
- You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation. - You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.

View File

@@ -1,326 +0,0 @@
# Declarative tests
Declarative tests ensure that Meilisearch features remain stable across versions.
While we already have unit tests, those are run against **temporary databases** that are created fresh each time and therefore never risk corruption.
Declarative tests instead **simulate the lifetime of a database**: they chain together commands and requests to change the binary, verifying that database state and API responses remain consistent.
## Basic example
```jsonc
{
"type": "test",
"name": "api-keys",
"binary": { // the first command will run on the binary following this specification.
"source": "release", // get the binary as a release from GitHub
"version": "1.19.0", // version to fetch
"edition": "community" // edition to fetch
},
"commands": []
}
```
This example defines a no-op test (it does nothing).
If the file is saved at `workloads/tests/example.json`, you can run it with:
```bash
cargo xtask test workloads/tests/example.json
```
## Commands
Commands represent API requests sent to Meilisearch endpoints during a test.
They are executed sequentially, and their responses can be validated to ensure consistent behavior across upgrades.
```jsonc
{
"route": "keys",
"method": "POST",
"body": {
"inline": {
"actions": [
"search",
"documents.add"
],
"description": "Test API Key",
"expiresAt": null,
"indexes": [ "movies" ]
}
}
}
```
This command issues a `POST /keys` request, creating an API key with permissions to search and add documents in the `movies` index.
### Using assets in commands
To keep tests concise and reusable, you can define **assets** at the root of the workload file.
Assets are external data sources (such as datasets) that are cached between runs, making tests faster and easier to read.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.19.0",
"edition": "community"
},
"assets": {
"movies.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
}
},
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
}
}
]
}
```
In this example:
- The `movies.json` dataset is defined as an asset, pointing to a remote URL.
- The SHA-256 checksum ensures integrity.
- The `POST /indexes/movies/documents` command uses this asset as the request body.
This makes the test much cleaner than inlining a large dataset directly into the command.
For asset handling, please refer to the [declarative benchmarks documentation](/BENCHMARKS.md#adding-new-assets).
### Asserting responses
Commands can specify both the **expected status code** and the **expected response body**.
```jsonc
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
},
"expectedStatus": 202,
"expectedResponse": {
"enqueuedAt": "[timestamp]", // Set to a bracketed string to ignore the value
"indexUid": "movies",
"status": "enqueued",
"taskUid": 1,
"type": "documentAdditionOrUpdate"
},
"synchronous": "WaitForTask"
}
```
Manually writing `expectedResponse` fields can be tedious.
Instead, you can let the test runner populate them automatically:
```bash
# Run the workload to populate expected fields. Only adds the missing ones, doesn't change existing data
cargo xtask test workloads/tests/example.json --add-missing-responses
# OR
# Run the workload to populate expected fields. Updates all fields including existing ones
cargo xtask test workloads/tests/example.json --update-responses
```
This workflow is recommended:
1. Write the test without expected fields.
2. Run it with `--add-missing-responses` to capture the actual responses.
3. Review and commit the generated expectations.
## Changing binary
It is possible to insert an instruction to change the current Meilisearch instance from one binary specification to another during a test.
When executed, such an instruction will:
1. Stop the current Meilisearch instance.
2. Fetch the binary specified by the instruction.
3. Restart the server with the specified binary on the same database.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.19.0", // start with version v1.19.0
"edition": "community"
},
"assets": {
"movies.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
}
},
"commands": [
// setup some data
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
}
},
// switch binary to v1.24.0
{
"binary": {
"source": "release",
"version": "1.24.0",
"edition": "community"
}
}
]
}
```
### Typical Usage
In most cases, the change binary instruction will be used to update a database.
- **Set up** some data using commands on an older version.
- **Upgrade** to the latest version.
- **Assert** that the data and API behavior remain correct after the upgrade.
To properly test the dumpless upgrade, one should typically:
1. Open the database without processing the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` and `--experimental-max-number-of-batched-tasks=0` as extra CLI arguments
2. Check that the search, stats and task queue still work.
3. Open the database and process the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` as the extra CLI argument. Use a `health` command to wait for the upgrade task to finish.
4. Check that the indexing, search, stats, and task queue still work.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.12.0",
"edition": "community"
},
"commands": [
// 0. Run commands to populate the database
{
// ..
},
// 1. Open the database with new MS without processing the update task
{
"binary": {
"source": "build", // build the binary from the sources in the current git repository
"edition": "community",
"extraCliArgs": [
"--experimental-dumpless-upgrade", // allows to open with a newer MS
"--experimental-max-number-of-batched-tasks=0" // prevent processing of the update task
]
}
},
// 2. Check the search etc.
{
// ..
},
// 3. Open the database with new MS and processing the update task
{
"binary": {
"source": "build", // build the binary from the sources in the current git repository
"edition": "community",
"extraCliArgs": [
"--experimental-dumpless-upgrade" // allows to open with a newer MS
// no `--experimental-max-number-of-batched-tasks=0`
]
}
},
// 4. Check the indexing, search, etc.
{
// ..
}
]
}
```
This ensures backward compatibility: databases created with older Meilisearch versions should remain functional and consistent after an upgrade.
## Variables
Sometimes a command needs to use a value returned by a **previous response**.
These values can be captured and reused using the register field.
```jsonc
{
"route": "keys",
"method": "POST",
"body": {
"inline": {
"actions": [
"search",
"documents.add"
],
"description": "Test API Key",
"expiresAt": null,
"indexes": [ "movies" ]
}
},
"expectedResponse": {
"key": "c6f64630bad2996b1f675007c8800168e14adf5d6a7bb1a400a6d2b158050eaf",
// ...
},
"register": {
"key": "/key"
},
"synchronous": "WaitForResponse"
}
```
The `register` field captures the value at the JSON path `/key` from the response.
Paths follow the **JavaScript Object Notation Pointer (RFC 6901)** format.
Registered variables are available for all subsequent commands.
Registered variables can be referenced by wrapping their name in double curly braces:
In the route/path:
```jsonc
{
"route": "tasks/{{ task_id }}",
"method": "GET"
}
```
In the request body:
```jsonc
{
"route": "indexes/movies/documents",
"method": "PATCH",
"body": {
"inline": {
"id": "{{ document_id }}",
"overview": "Shazam turns evil and the world is in danger.",
}
}
}
```
Or they can be referenced by their name (**without curly braces**) as an API key:
```jsonc
{
"route": "indexes/movies/documents",
"method": "POST",
"body": { /* ... */ },
"apiKeyVariable": "key" // The **content** of the key variable will be used as an API key
}
```

View File

@@ -11,27 +11,27 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.100" anyhow = "1.0.98"
bumpalo = "3.19.0" bumpalo = "3.18.1"
csv = "1.4.0" csv = "1.3.1"
memmap2 = "0.9.9" memmap2 = "0.9.7"
milli = { path = "../milli" } milli = { path = "../milli" }
mimalloc = { version = "0.1.48", default-features = false } mimalloc = { version = "0.1.47", default-features = false }
serde_json = { version = "1.0.145", features = ["preserve_order"] } serde_json = { version = "1.0.140", features = ["preserve_order"] }
tempfile = "3.23.0" tempfile = "3.20.0"
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.7.0", features = ["html_reports"] } criterion = { version = "0.6.0", features = ["html_reports"] }
rand = "0.8.5" rand = "0.8.5"
rand_chacha = "0.3.1" rand_chacha = "0.3.1"
roaring = "0.10.12" roaring = "0.10.12"
[build-dependencies] [build-dependencies]
anyhow = "1.0.100" anyhow = "1.0.98"
bytes = "1.11.0" bytes = "1.10.1"
convert_case = "0.9.0" convert_case = "0.8.0"
flate2 = "1.1.5" flate2 = "1.1.2"
reqwest = { version = "0.12.24", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
[features] [features]
default = ["milli/all-tokenizations"] default = ["milli/all-tokenizations"]

View File

@@ -10,7 +10,7 @@ use milli::documents::PrimaryKey;
use milli::heed::{EnvOpenOptions, RwTxn}; use milli::heed::{EnvOpenOptions, RwTxn};
use milli::progress::Progress; use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexerConfig, MissingDocumentPolicy, Settings}; use milli::update::{IndexerConfig, Settings};
use milli::vector::RuntimeEmbedders; use milli::vector::RuntimeEmbedders;
use milli::{FilterableAttributesRule, Index}; use milli::{FilterableAttributesRule, Index};
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
@@ -21,10 +21,6 @@ use roaring::RoaringBitmap;
#[global_allocator] #[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn no_cancel() -> bool {
false
}
const BENCHMARK_ITERATION: usize = 10; const BENCHMARK_ITERATION: usize = 10;
fn setup_dir(path: impl AsRef<Path>) { fn setup_dir(path: impl AsRef<Path>) {
@@ -69,7 +65,7 @@ fn setup_settings<'t>(
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields); builder.set_sortable_fields(sortable_fields);
builder.execute(&no_cancel, &Progress::default(), Default::default()).unwrap(); builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
} }
fn setup_index_with_settings( fn setup_index_with_settings(
@@ -146,7 +142,7 @@ fn indexing_songs_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -156,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -172,7 +168,7 @@ fn indexing_songs_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -214,7 +210,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -224,7 +220,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -240,7 +236,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -260,7 +256,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -270,7 +266,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -286,7 +282,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -330,7 +326,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -340,7 +336,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -356,7 +352,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -408,7 +404,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -418,7 +414,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -434,7 +430,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -454,7 +450,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -464,7 +460,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -480,7 +476,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -496,7 +492,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -506,7 +502,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -522,7 +518,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -565,7 +561,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -575,7 +571,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -591,7 +587,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -633,7 +629,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -643,7 +639,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -659,7 +655,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -701,7 +697,7 @@ fn indexing_wiki(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -711,7 +707,7 @@ fn indexing_wiki(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -727,7 +723,7 @@ fn indexing_wiki(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -768,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -778,7 +774,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -794,7 +790,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -814,7 +810,7 @@ fn reindexing_wiki(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -824,7 +820,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -840,7 +836,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -883,7 +879,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -893,7 +889,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -909,7 +905,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -961,7 +957,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = let documents =
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv"); utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -971,7 +967,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -987,7 +983,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1008,7 +1004,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = let documents =
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv"); utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1018,7 +1014,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1034,7 +1030,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1051,7 +1047,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = let documents =
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv"); utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1061,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1077,7 +1073,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1119,7 +1115,7 @@ fn indexing_movies_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json"); let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1129,7 +1125,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1145,7 +1141,7 @@ fn indexing_movies_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1186,7 +1182,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json"); let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1196,7 +1192,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1212,7 +1208,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1232,7 +1228,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json"); let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1242,7 +1238,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1258,7 +1254,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1301,7 +1297,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json"); let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1311,7 +1307,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1327,7 +1323,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1376,7 +1372,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
Some(primary_key), Some(primary_key),
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1416,7 +1412,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json"); let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1426,7 +1422,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1442,7 +1438,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1462,7 +1458,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json"); let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1472,7 +1468,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1488,7 +1484,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1504,7 +1500,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json"); let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1514,7 +1510,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1530,7 +1526,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1595,7 +1591,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1605,7 +1601,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1621,7 +1617,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1687,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1697,7 +1693,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1713,7 +1709,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1771,7 +1767,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1781,7 +1777,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1797,7 +1793,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1839,7 +1835,7 @@ fn indexing_geo(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1849,7 +1845,7 @@ fn indexing_geo(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1865,7 +1861,7 @@ fn indexing_geo(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1906,7 +1902,7 @@ fn reindexing_geo(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1916,7 +1912,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1932,7 +1928,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -1952,7 +1948,7 @@ fn reindexing_geo(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -1962,7 +1958,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -1978,7 +1974,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )
@@ -2021,7 +2017,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer
@@ -2031,7 +2027,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&rtxn, &rtxn,
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&no_cancel, &|| false,
Progress::default(), Progress::default(),
None, None,
) )
@@ -2047,7 +2043,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
primary_key, primary_key,
&document_changes, &document_changes,
RuntimeEmbedders::default(), RuntimeEmbedders::default(),
&no_cancel, &|| false,
&Progress::default(), &Progress::default(),
&Default::default(), &Default::default(),
) )

View File

@@ -13,7 +13,7 @@ use milli::documents::sort::recursive_sort;
use milli::heed::EnvOpenOptions; use milli::heed::EnvOpenOptions;
use milli::progress::Progress; use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexerConfig, MissingDocumentPolicy, Settings}; use milli::update::{IndexerConfig, Settings};
use milli::vector::RuntimeEmbedders; use milli::vector::RuntimeEmbedders;
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy}; use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
use serde_json::Value; use serde_json::Value;
@@ -111,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index {
let documents = documents_from(conf.dataset, conf.dataset_format); let documents = documents_from(conf.dataset, conf.dataset_format);
let mut indexer = indexer::DocumentOperation::new(); let mut indexer = indexer::DocumentOperation::new();
indexer.replace_documents(&documents, MissingDocumentPolicy::default()).unwrap(); indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer let (document_changes, _operation_stats, primary_key) = indexer

View File

@@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
time = { version = "0.3.44", features = ["parsing"] } time = { version = "0.3.41", features = ["parsing"] }
[build-dependencies] [build-dependencies]
anyhow = "1.0.100" anyhow = "1.0.98"
vergen-gitcl = "1.0.8" vergen-git2 = "1.0.7"

View File

@@ -15,7 +15,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml). // in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory. // This is due to the Dockerfile building the binary outside of the git directory.
let mut builder = vergen_gitcl::GitclBuilder::default(); let mut builder = vergen_git2::Git2Builder::default();
builder.branch(true); builder.branch(true);
builder.commit_timestamp(true); builder.commit_timestamp(true);
@@ -25,5 +25,5 @@ fn emit_git_variables() -> anyhow::Result<()> {
let git2 = builder.build()?; let git2 = builder.build()?;
vergen_gitcl::Emitter::default().fail_on_error().add_instructions(&git2)?.emit() vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
} }

View File

@@ -1,6 +0,0 @@
use build_info::BuildInfo;
fn main() {
let info = BuildInfo::from_build();
dbg!(info);
}

View File

@@ -11,27 +11,24 @@ readme.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.100" anyhow = "1.0.98"
flate2 = "1.1.5" flate2 = "1.1.2"
http = "1.3.1" http = "1.3.1"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.21.3" once_cell = "1.21.3"
regex = "1.12.2" regex = "1.11.1"
roaring = { version = "0.10.12", features = ["serde"] } roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.228", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] } serde_json = { version = "1.0.140", features = ["preserve_order"] }
tar = "0.4.44" tar = "0.4.44"
tempfile = "3.23.0" tempfile = "3.20.0"
thiserror = "2.0.17" thiserror = "2.0.12"
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.41" tracing = "0.1.41"
uuid = { version = "1.18.1", features = ["serde", "v4"] } uuid = { version = "1.17.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
[features]
enterprise = ["meilisearch-types/enterprise"]

View File

@@ -9,9 +9,8 @@ use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key; use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked; use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::network::{DbTaskNetwork, NetworkTopologyChange};
use meilisearch_types::tasks::{ use meilisearch_types::tasks::{
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
}; };
use meilisearch_types::InstanceUid; use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -96,9 +95,7 @@ pub struct TaskDump {
)] )]
pub finished_at: Option<OffsetDateTime>, pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<DbTaskNetwork>, pub network: Option<TaskNetwork>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom_metadata: Option<String>,
} }
// A `Kind` specific version made for the dump. If modified you may break the dump. // A `Kind` specific version made for the dump. If modified you may break the dump.
@@ -164,7 +161,6 @@ pub enum KindDump {
IndexCompaction { IndexCompaction {
index_uid: String, index_uid: String,
}, },
NetworkTopologyChange(NetworkTopologyChange),
} }
impl From<Task> for TaskDump { impl From<Task> for TaskDump {
@@ -182,7 +178,6 @@ impl From<Task> for TaskDump {
started_at: task.started_at, started_at: task.started_at,
finished_at: task.finished_at, finished_at: task.finished_at,
network: task.network, network: task.network,
custom_metadata: task.custom_metadata,
} }
} }
} }
@@ -251,9 +246,6 @@ impl From<KindWithContent> for KindDump {
KindWithContent::IndexCompaction { index_uid } => { KindWithContent::IndexCompaction { index_uid } => {
KindDump::IndexCompaction { index_uid } KindDump::IndexCompaction { index_uid }
} }
KindWithContent::NetworkTopologyChange(network_topology_change) => {
KindDump::NetworkTopologyChange(network_topology_change)
}
} }
} }
} }
@@ -267,13 +259,13 @@ pub(crate) mod test {
use big_s::S; use big_s::S;
use maplit::{btreemap, btreeset}; use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats}; use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::enterprise_edition::network::{Network, Remote};
use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::RuntimeTogglableFeatures; use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key}; use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, FilterableAttributesRule}; use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::network::{Network, Remote};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::task_view::DetailsView; use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status}; use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
@@ -404,7 +396,6 @@ pub(crate) mod test {
started_at: Some(datetime!(2022-11-20 0:00 UTC)), started_at: Some(datetime!(2022-11-20 0:00 UTC)),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)), finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
network: None, network: None,
custom_metadata: None,
}, },
None, None,
), ),
@@ -430,7 +421,6 @@ pub(crate) mod test {
started_at: None, started_at: None,
finished_at: None, finished_at: None,
network: None, network: None,
custom_metadata: None,
}, },
Some(vec![ Some(vec![
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(), json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
@@ -451,7 +441,6 @@ pub(crate) mod test {
started_at: None, started_at: None,
finished_at: None, finished_at: None,
network: None, network: None,
custom_metadata: None,
}, },
None, None,
), ),
@@ -565,8 +554,7 @@ pub(crate) mod test {
Network { Network {
local: Some("myself".to_string()), local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }}, remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
leader: None, sharding: false,
version: Default::default(),
} }
} }
@@ -620,10 +608,7 @@ pub(crate) mod test {
assert_eq!(dump.features().unwrap().unwrap(), expected); assert_eq!(dump.features().unwrap().unwrap(), expected);
// ==== checking the network // ==== checking the network
let mut expected = create_test_network(); let expected = create_test_network();
// from v1.29, we drop `leader` and `local` on import
expected.leader = None;
expected.local = None;
assert_eq!(&expected, dump.network().unwrap().unwrap()); assert_eq!(&expected, dump.network().unwrap().unwrap());
} }
} }

View File

@@ -164,7 +164,6 @@ impl CompatV5ToV6 {
started_at: task_view.started_at, started_at: task_view.started_at,
finished_at: task_view.finished_at, finished_at: task_view.finished_at,
network: None, network: None,
custom_metadata: None,
}; };
(task, content_file) (task, content_file)

View File

@@ -434,11 +434,7 @@ pub(crate) mod test {
// network // network
let network = dump.network().unwrap().unwrap(); let network = dump.network().unwrap().unwrap();
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
// since v1.29 we are dropping `local` and `leader` on import
insta::assert_snapshot!(network.local.is_none(), @"true");
insta::assert_snapshot!(network.leader.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700"); insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true"); insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701"); insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");

View File

@@ -107,14 +107,19 @@ impl Settings<Unchecked> {
} }
} }
#[derive(Default, Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Setting<T> { pub enum Setting<T> {
Set(T), Set(T),
Reset, Reset,
#[default]
NotSet, NotSet,
} }
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> { impl<T> Setting<T> {
pub const fn is_not_set(&self) -> bool { pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet) matches!(self, Self::NotSet)

View File

@@ -161,14 +161,19 @@ pub struct Facets {
pub min_level_size: Option<NonZeroUsize>, pub min_level_size: Option<NonZeroUsize>,
} }
#[derive(Default, Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Setting<T> { pub enum Setting<T> {
Set(T), Set(T),
Reset, Reset,
#[default]
NotSet, NotSet,
} }
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> { impl<T> Setting<T> {
pub fn map<U, F>(self, f: F) -> Setting<U> pub fn map<U, F>(self, f: F) -> Setting<U>
where where

View File

@@ -1,7 +1,9 @@
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};
use std::marker::PhantomData;
use std::str::FromStr; use std::str::FromStr;
use serde::Deserialize; use serde::de::Visitor;
use serde::{Deserialize, Deserializer};
use uuid::Uuid; use uuid::Uuid;
use super::settings::{Settings, Unchecked}; use super::settings::{Settings, Unchecked};
@@ -80,3 +82,59 @@ impl Display for IndexUidFormatError {
} }
impl std::error::Error for IndexUidFormatError {} impl std::error::Error for IndexUidFormatError {}
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum StarOr<T> {
Star,
Other(T),
}
impl<'de, T, E> Deserialize<'de> for StarOr<T>
where
T: FromStr<Err = E>,
E: Display,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
/// deserialize everything as a `StarOr::Other`, including "*".
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
/// not supported on untagged enums.
struct StarOrVisitor<T>(PhantomData<T>);
impl<T, FE> Visitor<'_> for StarOrVisitor<T>
where
T: FromStr<Err = FE>,
FE: Display,
{
type Value = StarOr<T>;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("a string")
}
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
where
SE: serde::de::Error,
{
match v {
"*" => Ok(StarOr::Star),
v => {
let other = FromStr::from_str(v).map_err(|e: T::Err| {
SE::custom(format!("Invalid `other` value: {}", e))
})?;
Ok(StarOr::Other(other))
}
}
}
}
deserializer.deserialize_str(StarOrVisitor(PhantomData))
}
}

View File

@@ -192,14 +192,19 @@ pub struct Facets {
pub min_level_size: Option<NonZeroUsize>, pub min_level_size: Option<NonZeroUsize>,
} }
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)] #[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> { pub enum Setting<T> {
Set(T), Set(T),
Reset, Reset,
#[default]
NotSet, NotSet,
} }
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> { impl<T> Setting<T> {
pub fn set(self) -> Option<T> { pub fn set(self) -> Option<T> {
match self { match self {

View File

@@ -47,15 +47,20 @@ pub struct Settings<T> {
pub _kind: PhantomData<T>, pub _kind: PhantomData<T>,
} }
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)] #[derive(Debug, Clone, PartialEq, Eq, Copy)]
#[cfg_attr(test, derive(serde::Serialize))] #[cfg_attr(test, derive(serde::Serialize))]
pub enum Setting<T> { pub enum Setting<T> {
Set(T), Set(T),
Reset, Reset,
#[default]
NotSet, NotSet,
} }
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> { impl<T> Setting<T> {
pub fn set(self) -> Option<T> { pub fn set(self) -> Option<T> {
match self { match self {

View File

@@ -322,7 +322,7 @@ impl From<Task> for TaskView {
_ => None, _ => None,
}); });
let duration = finished_at.zip(started_at).map(|(tf, ts)| tf - ts); let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
Self { Self {
uid: id, uid: id,

View File

@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key; pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings; pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::network::Network; pub type Network = meilisearch_types::enterprise_edition::network::Network;
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView; pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
// ===== Other types to clarify the code of the compat module // ===== Other types to clarify the code of the compat module
@@ -95,8 +95,7 @@ impl V6Reader {
Err(e) => return Err(e.into()), Err(e) => return Err(e.into()),
}; };
let mut network: Option<meilisearch_types::network::Network> = let network = match fs::read(dump.path().join("network.json")) {
match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?), Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
Err(error) => match error.kind() { Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled. // Allows the file to be missing, this will only result in all experimental features disabled.
@@ -108,14 +107,6 @@ impl V6Reader {
}, },
}; };
if let Some(network) = &mut network {
// as dumps are typically imported in a different machine as the emitter (otherwise dumpless upgrade would be used),
// we decide to remove the self to avoid alias issues
network.local = None;
// for the same reason we disable automatic sharding
network.leader = None;
}
let webhooks = match fs::read(dump.path().join("webhooks.json")) { let webhooks = match fs::read(dump.path().join("webhooks.json")) {
Ok(webhooks_file) => Some(serde_json::from_reader(&*webhooks_file)?), Ok(webhooks_file) => Some(serde_json::from_reader(&*webhooks_file)?),
Err(error) => match error.kind() { Err(error) => match error.kind() {

View File

@@ -5,9 +5,9 @@ use std::path::PathBuf;
use flate2::write::GzEncoder; use flate2::write::GzEncoder;
use flate2::Compression; use flate2::Compression;
use meilisearch_types::batches::Batch; use meilisearch_types::batches::Batch;
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures}; use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key; use meilisearch_types::keys::Key;
use meilisearch_types::network::Network;
use meilisearch_types::settings::{Checked, Settings}; use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::webhooks::WebhooksDumpView; use meilisearch_types::webhooks::WebhooksDumpView;
use serde_json::{Map, Value}; use serde_json::{Map, Value};

View File

@@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
tempfile = "3.23.0" tempfile = "3.20.0"
thiserror = "2.0.17" thiserror = "2.0.12"
tracing = "0.1.41" tracing = "0.1.41"
uuid = { version = "1.18.1", features = ["serde", "v4"] } uuid = { version = "1.17.0", features = ["serde", "v4"] }

View File

@@ -60,7 +60,7 @@ impl FileStore {
/// Returns the file corresponding to the requested uuid. /// Returns the file corresponding to the requested uuid.
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> { pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
let path = self.update_path(uuid); let path = self.get_update_path(uuid);
let file = match StdFile::open(path) { let file = match StdFile::open(path) {
Ok(file) => file, Ok(file) => file,
Err(e) => { Err(e) => {
@@ -72,7 +72,7 @@ impl FileStore {
} }
/// Returns the path that correspond to this uuid, the path could not exists. /// Returns the path that correspond to this uuid, the path could not exists.
pub fn update_path(&self, uuid: Uuid) -> PathBuf { pub fn get_update_path(&self, uuid: Uuid) -> PathBuf {
self.path.join(uuid.to_string()) self.path.join(uuid.to_string())
} }

View File

@@ -16,7 +16,7 @@ license.workspace = true
serde_json = "1.0" serde_json = "1.0"
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.7.0", features = ["html_reports"] } criterion = { version = "0.6.0", features = ["html_reports"] }
[[bench]] [[bench]]
name = "benchmarks" name = "benchmarks"

View File

@@ -11,12 +11,12 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
arbitrary = { version = "1.4.2", features = ["derive"] } arbitrary = { version = "1.4.1", features = ["derive"] }
bumpalo = "3.19.0" bumpalo = "3.18.1"
clap = { version = "4.5.52", features = ["derive"] } clap = { version = "4.5.40", features = ["derive"] }
either = "1.15.0" either = "1.15.0"
fastrand = "2.3.0" fastrand = "2.3.0"
milli = { path = "../milli" } milli = { path = "../milli" }
serde = { version = "1.0.228", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] } serde_json = { version = "1.0.140", features = ["preserve_order"] }
tempfile = "3.23.0" tempfile = "3.20.0"

View File

@@ -12,7 +12,7 @@ use milli::documents::mmap_from_objects;
use milli::heed::EnvOpenOptions; use milli::heed::EnvOpenOptions;
use milli::progress::Progress; use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexerConfig, MissingDocumentPolicy}; use milli::update::IndexerConfig;
use milli::vector::RuntimeEmbedders; use milli::vector::RuntimeEmbedders;
use milli::Index; use milli::Index;
use serde_json::Value; use serde_json::Value;
@@ -113,12 +113,9 @@ fn main() {
for op in &operations { for op in &operations {
match op { match op {
Either::Left(documents) => indexer Either::Left(documents) => {
.replace_documents( indexer.replace_documents(documents).unwrap()
documents, }
MissingDocumentPolicy::default(),
)
.unwrap(),
Either::Right(ids) => indexer.delete_documents(ids), Either::Right(ids) => indexer.delete_documents(ids),
} }
} }

View File

@@ -11,34 +11,31 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.100" anyhow = "1.0.98"
bincode = "1.3.3" bincode = "1.3.3"
byte-unit = "5.1.6" byte-unit = "5.1.6"
bytes = "1.11.0" bumpalo = "3.18.1"
bumpalo = "3.19.0"
bumparaw-collections = "0.1.4" bumparaw-collections = "0.1.4"
convert_case = "0.9.0" convert_case = "0.8.0"
csv = "1.4.0" csv = "1.3.1"
derive_builder = "0.20.2" derive_builder = "0.20.2"
dump = { path = "../dump" } dump = { path = "../dump" }
enum-iterator = "2.3.0" enum-iterator = "2.1.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.1.5" flate2 = "1.1.2"
hashbrown = "0.15.5" indexmap = "2.9.0"
indexmap = "2.12.0"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.9" memmap2 = "0.9.7"
page_size = "0.6.0" page_size = "0.6.0"
rayon = "1.11.0" rayon = "1.10.0"
roaring = { version = "0.10.12", features = ["serde"] } roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.228", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.145", features = ["preserve_order"] } serde_json = { version = "1.0.140", features = ["preserve_order"] }
tar = "0.4.44"
synchronoise = "1.0.1" synchronoise = "1.0.1"
tempfile = "3.23.0" tempfile = "3.20.0"
thiserror = "2.0.17" thiserror = "2.0.12"
time = { version = "0.3.44", features = [ time = { version = "0.3.41", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
@@ -46,15 +43,8 @@ time = { version = "0.3.44", features = [
] } ] }
tracing = "0.1.41" tracing = "0.1.41"
ureq = "2.12.1" ureq = "2.12.1"
uuid = { version = "1.18.1", features = ["serde", "v4"] } uuid = { version = "1.17.0", features = ["serde", "v4"] }
backoff = { version = "0.4.0", features = ["futures", "tokio"] } backoff = "0.4.0"
reqwest = { version = "0.12.24", features = [
"rustls-tls",
"http2",
], default-features = false }
rusty-s3 = "0.8.1"
tokio = { version = "1.48.0", features = ["full"] }
urlencoding = "2.1.3"
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"
@@ -63,6 +53,3 @@ crossbeam-channel = "0.5.15"
insta = { version = "=1.39.0", features = ["json", "redactions"] } insta = { version = "=1.39.0", features = ["json", "redactions"] }
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
[features]
enterprise = ["meilisearch-types/enterprise"]

View File

@@ -3,17 +3,17 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::io; use std::io;
use crate::{utils, Error, IndexScheduler, Result};
use dump::{KindDump, TaskDump, UpdateFile}; use dump::{KindDump, TaskDump, UpdateFile};
use meilisearch_types::batches::{Batch, BatchId}; use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::RwTxn; use meilisearch_types::heed::RwTxn;
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::update::MissingDocumentPolicy;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use uuid::Uuid; use uuid::Uuid;
use crate::{utils, Error, IndexScheduler, Result};
pub struct Dump<'a> { pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler, index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a>, wtxn: RwTxn<'a>,
@@ -150,7 +150,6 @@ impl<'a> Dump<'a> {
details: task.details, details: task.details,
status: task.status, status: task.status,
network: task.network, network: task.network,
custom_metadata: task.custom_metadata,
kind: match task.kind { kind: match task.kind {
KindDump::DocumentImport { KindDump::DocumentImport {
primary_key, primary_key,
@@ -164,7 +163,6 @@ impl<'a> Dump<'a> {
content_file: content_uuid.ok_or(Error::CorruptedDump)?, content_file: content_uuid.ok_or(Error::CorruptedDump)?,
documents_count, documents_count,
allow_index_creation, allow_index_creation,
on_missing_document: MissingDocumentPolicy::default(),
}, },
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
documents_ids, documents_ids,
@@ -239,9 +237,6 @@ impl<'a> Dump<'a> {
KindDump::IndexCompaction { index_uid } => { KindDump::IndexCompaction { index_uid } => {
KindWithContent::IndexCompaction { index_uid } KindWithContent::IndexCompaction { index_uid }
} }
KindDump::NetworkTopologyChange(network_topology_change) => {
KindWithContent::NetworkTopologyChange(network_topology_change)
}
}, },
}; };

View File

@@ -3,13 +3,9 @@ use std::fmt::Display;
use meilisearch_types::batches::BatchId; use meilisearch_types::batches::BatchId;
use meilisearch_types::error::{Code, ErrorCode}; use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::milli::index::RollbackOutcome; use meilisearch_types::milli::index::RollbackOutcome;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::tasks::network::ReceiveTaskError;
use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::tasks::{Kind, Status};
use meilisearch_types::{heed, milli}; use meilisearch_types::{heed, milli};
use reqwest::StatusCode;
use thiserror::Error; use thiserror::Error;
use uuid::Uuid;
use crate::TaskId; use crate::TaskId;
@@ -131,14 +127,6 @@ pub enum Error {
#[error("Aborted task")] #[error("Aborted task")]
AbortedTask, AbortedTask,
#[error("S3 error: status: {status}, body: {body}")]
S3Error { status: StatusCode, body: String },
#[error("S3 HTTP error: {0}")]
S3HttpError(reqwest::Error),
#[error("S3 XML error: {0}")]
S3XmlError(Box<dyn std::error::Error + Send + Sync>),
#[error("S3 bucket error: {0}")]
S3BucketError(rusty_s3::BucketError),
#[error(transparent)] #[error(transparent)]
Dump(#[from] dump::Error), Dump(#[from] dump::Error),
#[error(transparent)] #[error(transparent)]
@@ -194,17 +182,6 @@ pub enum Error {
#[error(transparent)] #[error(transparent)]
HeedTransaction(heed::Error), HeedTransaction(heed::Error),
#[error("No network topology change task is currently enqueued or processing")]
ImportTaskWithoutNetworkTask,
#[error("The network task version (`{network_task}`) does not match the import task version (`{import_task}`)")]
NetworkVersionMismatch { network_task: Uuid, import_task: Uuid },
#[error("The import task emanates from an unknown remote `{0}`")]
ImportTaskUnknownRemote(String),
#[error("The import task with key `{0}` was already received")]
ImportTaskAlreadyReceived(DocumentId),
#[error("{action} requires the Enterprise Edition")]
RequiresEnterpriseEdition { action: &'static str },
#[cfg(test)] #[cfg(test)]
#[error("Planned failure for tests.")] #[error("Planned failure for tests.")]
PlannedFailure, PlannedFailure,
@@ -249,10 +226,6 @@ impl Error {
| Error::TaskCancelationWithEmptyQuery | Error::TaskCancelationWithEmptyQuery
| Error::FromRemoteWhenExporting { .. } | Error::FromRemoteWhenExporting { .. }
| Error::AbortedTask | Error::AbortedTask
| Error::S3Error { .. }
| Error::S3HttpError(_)
| Error::S3XmlError(_)
| Error::S3BucketError(_)
| Error::Dump(_) | Error::Dump(_)
| Error::Heed(_) | Error::Heed(_)
| Error::Milli { .. } | Error::Milli { .. }
@@ -262,11 +235,6 @@ impl Error {
| Error::Persist(_) | Error::Persist(_)
| Error::FeatureNotEnabled(_) | Error::FeatureNotEnabled(_)
| Error::Export(_) | Error::Export(_)
| Error::ImportTaskWithoutNetworkTask
| Error::NetworkVersionMismatch { .. }
| Error::ImportTaskAlreadyReceived(_)
| Error::ImportTaskUnknownRemote(_)
| Error::RequiresEnterpriseEdition { .. }
| Error::Anyhow(_) => true, | Error::Anyhow(_) => true,
Error::CreateBatch(_) Error::CreateBatch(_)
| Error::CorruptedTaskQueue | Error::CorruptedTaskQueue
@@ -325,19 +293,8 @@ impl ErrorCode for Error {
Error::BatchNotFound(_) => Code::BatchNotFound, Error::BatchNotFound(_) => Code::BatchNotFound,
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters, Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters, Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
// TODO: not sure of the Code to use
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::ImportTaskWithoutNetworkTask => Code::ImportTaskWithoutNetworkTask,
Error::NetworkVersionMismatch { .. } => Code::NetworkVersionMismatch,
Error::ImportTaskAlreadyReceived(_) => Code::ImportTaskAlreadyReceived,
Error::ImportTaskUnknownRemote(_) => Code::ImportTaskUnknownRemote,
Error::RequiresEnterpriseEdition { .. } => Code::RequiresEnterpriseEdition,
Error::S3Error { status, .. } if status.is_client_error() => {
Code::InvalidS3SnapshotRequest
}
Error::S3Error { .. } => Code::S3SnapshotServerError,
Error::S3HttpError(_) => Code::S3SnapshotServerError,
Error::S3XmlError(_) => Code::S3SnapshotServerError,
Error::S3BucketError(_) => Code::InvalidS3SnapshotParameters,
Error::Dump(e) => e.error_code(), Error::Dump(e) => e.error_code(),
Error::Milli { error, .. } => error.error_code(), Error::Milli { error, .. } => error.error_code(),
Error::ProcessBatchPanicked(_) => Code::Internal, Error::ProcessBatchPanicked(_) => Code::Internal,
@@ -369,12 +326,3 @@ impl ErrorCode for Error {
} }
} }
} }
impl From<ReceiveTaskError> for Error {
fn from(value: ReceiveTaskError) -> Self {
match value {
ReceiveTaskError::UnknownRemote(unknown) => Error::ImportTaskUnknownRemote(unknown),
ReceiveTaskError::DuplicateTask(dup) => Error::ImportTaskAlreadyReceived(dup),
}
}
}

View File

@@ -1,9 +1,9 @@
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
use meilisearch_types::network::Network;
use crate::error::FeatureNotEnabledError; use crate::error::FeatureNotEnabledError;
use crate::Result; use crate::Result;
@@ -38,10 +38,6 @@ impl RoFeatures {
Self { runtime } Self { runtime }
} }
pub fn from_runtime_features(features: RuntimeTogglableFeatures) -> Self {
Self { runtime: features }
}
pub fn runtime_features(&self) -> RuntimeTogglableFeatures { pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
self.runtime self.runtime
} }

View File

@@ -361,12 +361,6 @@ impl IndexMapper {
Ok(()) Ok(())
} }
/// The number of indexes in the database
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
pub fn index_count(&self, rtxn: &RoTxn) -> Result<u64> {
Ok(self.index_mapping.len(rtxn)?)
}
/// Return an index, may open it if it wasn't already opened. /// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> { pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) = if let Some((current_name, current_index)) =

View File

@@ -6,7 +6,7 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn}; use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Kind, Status, Task}; use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; use meilisearch_types::versioning;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper; use crate::index_mapper::IndexMapper;
@@ -27,7 +27,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
queue, queue,
scheduler, scheduler,
persisted, persisted,
export_default_payload_size_bytes: _,
index_mapper, index_mapper,
features: _, features: _,
@@ -37,7 +36,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
run_loop_iteration: _, run_loop_iteration: _,
embedders: _, embedders: _,
chat_settings: _, chat_settings: _,
runtime: _,
} = scheduler; } = scheduler;
let rtxn = env.read_txn().unwrap(); let rtxn = env.read_txn().unwrap();
@@ -233,7 +231,6 @@ pub fn snapshot_task(task: &Task) -> String {
status, status,
kind, kind,
network, network,
custom_metadata,
} = task; } = task;
snap.push('{'); snap.push('{');
snap.push_str(&format!("uid: {uid}, ")); snap.push_str(&format!("uid: {uid}, "));
@@ -254,9 +251,6 @@ pub fn snapshot_task(task: &Task) -> String {
if let Some(network) = network { if let Some(network) = network {
snap.push_str(&format!("network: {network:?}, ")) snap.push_str(&format!("network: {network:?}, "))
} }
if let Some(custom_metadata) = custom_metadata {
snap.push_str(&format!("custom_metadata: {custom_metadata:?}"))
}
snap.push('}'); snap.push('}');
snap snap
@@ -321,18 +315,11 @@ fn snapshot_details(d: &Details) -> String {
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}") format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
} }
Details::UpgradeDatabase { from, to } => { Details::UpgradeDatabase { from, to } => {
if to == &(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) {
format!("{{ from: {from:?}, to: [current version] }}")
} else {
format!("{{ from: {from:?}, to: {to:?} }}") format!("{{ from: {from:?}, to: {to:?} }}")
} }
}
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => { Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}") format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
} }
Details::NetworkTopologyChange { moved_documents, message } => {
format!("{{ moved_documents: {moved_documents:?}, message: {message:?}")
}
} }
} }
@@ -408,21 +395,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
snap.push('{'); snap.push('{');
snap.push_str(&format!("uid: {uid}, ")); snap.push_str(&format!("uid: {uid}, "));
let details = if let Some(upgrade_to) = &details.upgrade_to { snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
if upgrade_to.as_str()
== format!("v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}").as_str()
{
let mut details = details.clone();
details.upgrade_to = Some("[current version]".into());
serde_json::to_string(&details).unwrap()
} else {
serde_json::to_string(details).unwrap()
}
} else {
serde_json::to_string(details).unwrap()
};
snap.push_str(&format!("details: {details}, "));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap())); snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
if !embedder_stats.skip_serializing() { if !embedder_stats.skip_serializing() {
snap.push_str(&format!( snap.push_str(&format!(

View File

@@ -48,13 +48,13 @@ use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use std::time::Duration; use std::time::Duration;
use byte_unit::Byte;
use dump::Dump; use dump::Dump;
pub use error::Error; pub use error::Error;
pub use features::RoFeatures; pub use features::RoFeatures;
use flate2::bufread::GzEncoder; use flate2::bufread::GzEncoder;
use flate2::Compression; use flate2::Compression;
use meilisearch_types::batches::Batch; use meilisearch_types::batches::Batch;
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::features::{ use meilisearch_types::features::{
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures, ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
}; };
@@ -67,14 +67,11 @@ use meilisearch_types::milli::vector::{
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment, Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
}; };
use meilisearch_types::milli::{self, Index}; use meilisearch_types::milli::{self, Index};
use meilisearch_types::network::Network;
use meilisearch_types::task_view::TaskView; use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::network::{ use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork,
};
use meilisearch_types::tasks::{KindWithContent, Task};
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView}; use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
use milli::vector::db::IndexEmbeddingConfig; use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks;
pub use queue::Query; pub use queue::Query;
use queue::Queue; use queue::Queue;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -85,7 +82,6 @@ use uuid::Uuid;
use versioning::Versioning; use versioning::Versioning;
use crate::index_mapper::IndexMapper; use crate::index_mapper::IndexMapper;
use crate::processing::ProcessingTasks;
use crate::utils::clamp_to_page_size; use crate::utils::clamp_to_page_size;
pub(crate) type BEI128 = I128<BE>; pub(crate) type BEI128 = I128<BE>;
@@ -148,11 +144,9 @@ pub struct IndexSchedulerOptions {
/// If the autobatcher is allowed to automatically batch tasks /// If the autobatcher is allowed to automatically batch tasks
/// it will only batch this defined maximum size (in bytes) of tasks at once. /// it will only batch this defined maximum size (in bytes) of tasks at once.
pub batched_tasks_size_limit: u64, pub batched_tasks_size_limit: u64,
/// The maximum size of the default payload for exporting documents, in bytes
pub export_default_payload_size_bytes: Byte,
/// The experimental features enabled for this instance. /// The experimental features enabled for this instance.
pub instance_features: InstanceTogglableFeatures, pub instance_features: InstanceTogglableFeatures,
/// Whether the index scheduler is able to auto upgrade or not. /// The experimental features enabled for this instance.
pub auto_upgrade: bool, pub auto_upgrade: bool,
/// The maximal number of entries in the search query cache of an embedder. /// The maximal number of entries in the search query cache of an embedder.
/// ///
@@ -205,9 +199,6 @@ pub struct IndexScheduler {
/// to the same embeddings for the same input text. /// to the same embeddings for the same input text.
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>, embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
/// The maximum size of the default payload for exporting documents, in bytes
pub export_default_payload_size_bytes: Byte,
// ================= test // ================= test
// The next entry is dedicated to the tests. // The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler. /// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -225,9 +216,6 @@ pub struct IndexScheduler {
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick) /// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
#[cfg(test)] #[cfg(test)]
run_loop_iteration: Arc<RwLock<usize>>, run_loop_iteration: Arc<RwLock<usize>>,
/// The tokio runtime used for asynchronous tasks.
runtime: Option<tokio::runtime::Handle>,
} }
impl IndexScheduler { impl IndexScheduler {
@@ -243,7 +231,6 @@ impl IndexScheduler {
cleanup_enabled: self.cleanup_enabled, cleanup_enabled: self.cleanup_enabled,
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps, experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
persisted: self.persisted, persisted: self.persisted,
export_default_payload_size_bytes: self.export_default_payload_size_bytes,
webhooks: self.webhooks.clone(), webhooks: self.webhooks.clone(),
embedders: self.embedders.clone(), embedders: self.embedders.clone(),
@@ -255,7 +242,6 @@ impl IndexScheduler {
run_loop_iteration: self.run_loop_iteration.clone(), run_loop_iteration: self.run_loop_iteration.clone(),
features: self.features.clone(), features: self.features.clone(),
chat_settings: self.chat_settings, chat_settings: self.chat_settings,
runtime: self.runtime.clone(),
} }
} }
@@ -269,23 +255,13 @@ impl IndexScheduler {
} }
/// Create an index scheduler and start its run loop. /// Create an index scheduler and start its run loop.
#[allow(private_interfaces)] // because test_utils is private
pub fn new( pub fn new(
options: IndexSchedulerOptions, options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>, auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32), from_db_version: (u32, u32, u32),
runtime: Option<tokio::runtime::Handle>, #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
) -> Result<Self> { #[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
let this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
this.run();
Ok(this)
}
fn new_without_run(
options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32),
runtime: Option<tokio::runtime::Handle>,
) -> Result<Self> { ) -> Result<Self> {
std::fs::create_dir_all(&options.tasks_path)?; std::fs::create_dir_all(&options.tasks_path)?;
std::fs::create_dir_all(&options.update_file_path)?; std::fs::create_dir_all(&options.update_file_path)?;
@@ -340,7 +316,8 @@ impl IndexScheduler {
wtxn.commit()?; wtxn.commit()?;
Ok(Self { // allow unreachable_code to get rids of the warning in the case of a test build.
let this = Self {
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
version, version,
queue, queue,
@@ -355,34 +332,16 @@ impl IndexScheduler {
persisted, persisted,
webhooks: Arc::new(webhooks), webhooks: Arc::new(webhooks),
embedders: Default::default(), embedders: Default::default(),
export_default_payload_size_bytes: options.export_default_payload_size_bytes,
#[cfg(test)] // Will be replaced in `new_tests` in test environments #[cfg(test)]
test_breakpoint_sdr: crossbeam_channel::bounded(0).0, test_breakpoint_sdr,
#[cfg(test)] // Will be replaced in `new_tests` in test environments #[cfg(test)]
planned_failures: Default::default(), planned_failures,
#[cfg(test)] #[cfg(test)]
run_loop_iteration: Arc::new(RwLock::new(0)), run_loop_iteration: Arc::new(RwLock::new(0)),
features, features,
chat_settings, chat_settings,
runtime, };
})
}
/// Create an index scheduler and start its run loop.
#[cfg(test)]
fn new_test(
options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32),
runtime: Option<tokio::runtime::Handle>,
test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
planned_failures: Vec<(usize, test_utils::FailureLocation)>,
) -> Result<Self> {
let mut this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
this.test_breakpoint_sdr = test_breakpoint_sdr;
this.planned_failures = planned_failures;
this.run(); this.run();
Ok(this) Ok(this)
@@ -711,14 +670,14 @@ impl IndexScheduler {
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing) self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
} }
pub fn set_task_network(&self, task_id: TaskId, network: DbTaskNetwork) -> Result<Task> { pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
let mut task = let mut task =
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?; self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
task.network = Some(network); task.network = Some(network);
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?; self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
wtxn.commit()?; wtxn.commit()?;
Ok(task) Ok(())
} }
/// Return the batches matching the query from the user's point of view along /// Return the batches matching the query from the user's point of view along
@@ -767,31 +726,6 @@ impl IndexScheduler {
kind: KindWithContent, kind: KindWithContent,
task_id: Option<TaskId>, task_id: Option<TaskId>,
dry_run: bool, dry_run: bool,
) -> Result<Task> {
self.register_with_custom_metadata(kind, task_id, None, dry_run, None)
}
/// Register a new task in the scheduler, with metadata.
///
/// If it fails and data was associated with the task, it tries to delete the associated data.
///
/// # Parameters
///
/// - task_network: network of the task to check.
///
/// If the task is an import task, only accept it if:
///
/// 1. There is an ongoing network topology change task
/// 2. The task to register matches the network version of the network topology change task
///
/// Always accept the task if it is not an import task.
pub fn register_with_custom_metadata(
&self,
kind: KindWithContent,
task_id: Option<TaskId>,
custom_metadata: Option<String>,
dry_run: bool,
task_network: Option<TaskNetwork>,
) -> Result<Task> { ) -> Result<Task> {
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task // if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty()) if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
@@ -802,19 +736,7 @@ impl IndexScheduler {
} }
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
let task = self.queue.register(&mut wtxn, &kind, task_id, dry_run)?;
if let Some(TaskNetwork::Import { import_from, network_change, metadata }) = &task_network {
self.update_network_task(&mut wtxn, import_from, network_change, metadata)?;
}
let task = self.queue.register(
&mut wtxn,
&kind,
task_id,
custom_metadata,
dry_run,
task_network.map(DbTaskNetwork::from),
)?;
// If the registered task is a task cancelation // If the registered task is a task cancelation
// we inform the processing tasks to stop (if necessary). // we inform the processing tasks to stop (if necessary).
@@ -836,91 +758,6 @@ impl IndexScheduler {
Ok(task) Ok(task)
} }
pub fn network_no_index_for_remote(
&self,
remote_name: String,
origin: Origin,
) -> Result<(), Error> {
let mut wtxn = self.env.write_txn()?;
self.update_network_task(
&mut wtxn,
&ImportData { remote_name, index_name: None, document_count: 0 },
&origin,
&ImportMetadata { index_count: 0, task_key: None, total_index_documents: 0 },
)?;
wtxn.commit()?;
// wake up the scheduler as the task state has changed
self.scheduler.wake_up.signal();
Ok(())
}
fn update_network_task(
&self,
wtxn: &mut heed::RwTxn<'_>,
import_from: &ImportData,
network_change: &Origin,
metadata: &ImportMetadata,
) -> Result<(), Error> {
let mut network_tasks = self
.queue
.tasks
.get_kind(&*wtxn, meilisearch_types::tasks::Kind::NetworkTopologyChange)?;
if network_tasks.is_empty() {
return Err(Error::ImportTaskWithoutNetworkTask);
}
let network_task = {
let processing = self.processing_tasks.read().unwrap().processing.clone();
if processing.is_disjoint(&network_tasks) {
let enqueued = self
.queue
.tasks
.get_status(&*wtxn, meilisearch_types::tasks::Status::Enqueued)?;
network_tasks &= enqueued;
if let Some(network_task) = network_tasks.into_iter().next() {
network_task
} else {
return Err(Error::ImportTaskWithoutNetworkTask);
}
} else {
network_tasks &= &*processing;
network_tasks.into_iter().next().unwrap()
}
};
let mut network_task = self.queue.tasks.get_task(&*wtxn, network_task)?.unwrap();
let network_task_version = network_task
.network
.as_ref()
.map(|network| network.network_version())
.unwrap_or_default();
if network_task_version != network_change.network_version {
return Err(Error::NetworkVersionMismatch {
network_task: network_task_version,
import_task: network_change.network_version,
});
}
let KindWithContent::NetworkTopologyChange(network_topology_change) =
&mut network_task.kind
else {
tracing::error!("unexpected network kind for network task while registering task");
return Err(Error::CorruptedTaskQueue);
};
network_topology_change.receive_remote_task(
&import_from.remote_name,
import_from.index_name.as_deref(),
metadata.task_key,
import_from.document_count,
metadata.index_count,
metadata.total_index_documents,
)?;
self.queue.tasks.update_task(wtxn, &mut network_task)?;
Ok(())
}
/// Register a new task coming from a dump in the scheduler. /// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> { pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {

View File

@@ -42,10 +42,12 @@ impl ProcessingTasks {
/// Set the processing tasks to an empty list /// Set the processing tasks to an empty list
pub fn stop_processing(&mut self) -> Self { pub fn stop_processing(&mut self) -> Self {
self.progress = None;
Self { Self {
batch: std::mem::take(&mut self.batch), batch: std::mem::take(&mut self.batch),
processing: std::mem::take(&mut self.processing), processing: std::mem::take(&mut self.processing),
progress: std::mem::take(&mut self.progress), progress: None,
} }
} }

View File

@@ -1,17 +1,14 @@
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
use crate::test_utils::Breakpoint::*;
use crate::test_utils::{
index_creation_task, replace_document_import_task, replace_document_import_task_with_opts,
sample_documents, FailureLocation,
};
use crate::{IndexScheduler, Query};
use meili_snap::snapshot; use meili_snap::snapshot;
use meilisearch_auth::AuthFilter; use meilisearch_auth::AuthFilter;
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::update::MissingDocumentPolicy;
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status}; use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
use crate::test_utils::Breakpoint::*;
use crate::test_utils::{index_creation_task, FailureLocation};
use crate::{IndexScheduler, Query};
#[test] #[test]
fn query_batches_from_and_limit() { fn query_batches_from_and_limit() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
@@ -490,41 +487,3 @@ fn query_batches_canceled_by() {
// Return only 1 because the user is not authorized to see task 2 // Return only 1 because the user is not authorized to see task 2
snapshot!(snapshot_bitmap(&batches), @"[1,]"); snapshot!(snapshot_bitmap(&batches), @"[1,]");
} }
#[test]
fn batch_skip_creation_with_deletion() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let kind = index_creation_task("docs", "id");
let _task = index_scheduler.register(kind, None, false).unwrap();
handle.advance_one_successful_batch();
let (file0, documents_count0) = sample_documents(&index_scheduler, 1, 1);
let (file1, documents_count1) = sample_documents(&index_scheduler, 2, 1);
file0.persist().unwrap();
file1.persist().unwrap();
let kind = replace_document_import_task("docs", Some("id"), 1, documents_count0);
index_scheduler.register(kind, None, false).unwrap();
index_scheduler
.register(
KindWithContent::DocumentDeletion {
index_uid: "docs".to_string(),
documents_ids: vec!["1".to_string()],
},
None,
false,
)
.unwrap();
let kind = replace_document_import_task_with_opts(
"docs",
Some("id"),
2,
documents_count1,
MissingDocumentPolicy::Skip,
);
index_scheduler.register(kind, None, false).unwrap();
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler));
}

View File

@@ -15,7 +15,6 @@ use file_store::FileStore;
use meilisearch_types::batches::BatchId; use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::network::DbTaskNetwork;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use time::format_description::well_known::Rfc3339; use time::format_description::well_known::Rfc3339;
@@ -258,9 +257,7 @@ impl Queue {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
kind: &KindWithContent, kind: &KindWithContent,
task_id: Option<TaskId>, task_id: Option<TaskId>,
custom_metadata: Option<String>,
dry_run: bool, dry_run: bool,
network: Option<DbTaskNetwork>,
) -> Result<Task> { ) -> Result<Task> {
let next_task_id = self.tasks.next_task_id(wtxn)?; let next_task_id = self.tasks.next_task_id(wtxn)?;
@@ -282,8 +279,7 @@ impl Queue {
details: kind.default_details(), details: kind.default_details(),
status: Status::Enqueued, status: Status::Enqueued,
kind: kind.clone(), kind: kind.clone(),
network, network: None,
custom_metadata,
}; };
// For deletion and cancelation tasks, we want to make extra sure that they // For deletion and cancelation tasks, we want to make extra sure that they
// don't attempt to delete/cancel tasks that are newer than themselves. // don't attempt to delete/cancel tasks that are newer than themselves.
@@ -348,9 +344,7 @@ impl Queue {
tasks: to_delete, tasks: to_delete,
}, },
None, None,
None,
false, false,
None,
)?; )?;
Ok(()) Ok(())

View File

@@ -1,81 +0,0 @@
---
source: crates/index-scheduler/src/queue/batches_test.rs
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "docs", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "docs", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }}
2 {uid: 2, batch_uid: 1, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "docs", documents_ids: ["1"] }}
3 {uid: 3, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "docs", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true, on_missing_document: Skip }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,3,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,3,]
"documentDeletion" [2,]
"indexCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
docs [0,1,2,3,]
----------------------------------------------------------------------
### Index Mapper:
docs: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,2,3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,2,3,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"docs":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", }
1 {uid: 1, details: {"receivedDocuments":2,"indexedDocuments":1,"providedIds":1,"deletedDocuments":1}, stats: {"totalNbTasks":3,"status":{"succeeded":3},"types":{"documentAdditionOrUpdate":2,"documentDeletion":1},"indexUids":{"docs":3}}, stop reason: "batched all enqueued tasks", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]
1 [1,2,3,]
----------------------------------------------------------------------
### Batches Status:
succeeded [0,1,]
----------------------------------------------------------------------
### Batches Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [1,]
"indexCreation" [0,]
----------------------------------------------------------------------
### Batches Index Tasks:
docs [0,1,]
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [1,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/queue/test.rs
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 50, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 50, allow_index_creation: true, on_missing_document: Create }} 2 {uid: 2, status: enqueued, details: { received_documents: 50, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 50, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 5000, allow_index_creation: true, on_missing_document: Create }} 3 {uid: 3, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 5000, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,1,2,3,] enqueued [0,1,2,3,]

View File

@@ -3,8 +3,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::network::DbTaskNetwork; use meilisearch_types::tasks::{Kind, Status, Task};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime; use time::OffsetDateTime;
@@ -115,15 +114,14 @@ impl TaskQueue {
/// - CorruptedTaskQueue: The task doesn't exist in the database /// - CorruptedTaskQueue: The task doesn't exist in the database
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> { pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
// network topology tasks may be processed multiple times. let reprocessing = old_task.status != Status::Enqueued;
let maybe_reprocessing = old_task.status != Status::Enqueued
|| task.kind.as_kind() == Kind::NetworkTopologyChange;
debug_assert!(old_task != *task);
debug_assert_eq!(old_task.uid, task.uid); debug_assert_eq!(old_task.uid, task.uid);
// If we're processing a task that failed it may already contains a batch_uid // If we're processing a task that failed it may already contains a batch_uid
debug_assert!( debug_assert!(
maybe_reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()), reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
"\n==> old: {old_task:?}\n==> new: {task:?}" "\n==> old: {old_task:?}\n==> new: {task:?}"
); );
@@ -145,24 +143,13 @@ impl TaskQueue {
})?; })?;
} }
// Avoids rewriting part of the network topology change because of TOCTOU errors
if let (
KindWithContent::NetworkTopologyChange(old_state),
KindWithContent::NetworkTopologyChange(new_state),
) = (old_task.kind, &mut task.kind)
{
new_state.merge(old_state);
// the state possibly just changed, rewrite the details
task.details = Some(new_state.to_details());
}
assert_eq!( assert_eq!(
old_task.enqueued_at, task.enqueued_at, old_task.enqueued_at, task.enqueued_at,
"Cannot update a task's enqueued_at time" "Cannot update a task's enqueued_at time"
); );
if old_task.started_at != task.started_at { if old_task.started_at != task.started_at {
assert!( assert!(
maybe_reprocessing || old_task.started_at.is_none(), reprocessing || old_task.started_at.is_none(),
"Cannot update a task's started_at time" "Cannot update a task's started_at time"
); );
if let Some(started_at) = old_task.started_at { if let Some(started_at) = old_task.started_at {
@@ -174,7 +161,7 @@ impl TaskQueue {
} }
if old_task.finished_at != task.finished_at { if old_task.finished_at != task.finished_at {
assert!( assert!(
maybe_reprocessing || old_task.finished_at.is_none(), reprocessing || old_task.finished_at.is_none(),
"Cannot update a task's finished_at time" "Cannot update a task's finished_at time"
); );
if let Some(finished_at) = old_task.finished_at { if let Some(finished_at) = old_task.finished_at {
@@ -188,16 +175,7 @@ impl TaskQueue {
task.network = match (old_task.network, task.network.take()) { task.network = match (old_task.network, task.network.take()) {
(None, None) => None, (None, None) => None,
(None, Some(network)) | (Some(network), None) => Some(network), (None, Some(network)) | (Some(network), None) => Some(network),
(Some(left), Some(right)) => Some(match (left, right) { (Some(_), Some(network)) => Some(network),
(
DbTaskNetwork::Remotes { remote_tasks: mut left, network_version: _ },
DbTaskNetwork::Remotes { remote_tasks: mut right, network_version },
) => {
left.append(&mut right);
DbTaskNetwork::Remotes { remote_tasks: left, network_version }
}
(_, right) => right,
}),
}; };
self.all_tasks.put(wtxn, &task.uid, task)?; self.all_tasks.put(wtxn, &task.uid, task)?;

View File

@@ -203,30 +203,26 @@ fn test_disable_auto_deletion_of_tasks() {
) )
.unwrap(); .unwrap();
{
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap(); let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler let tasks =
.queue index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
} drop(rtxn);
drop(proc);
// now we're above the max number of tasks // now we're above the max number of tasks
// and if we try to advance in the tick function no new task deletion should be enqueued // and if we try to advance in the tick function no new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]); handle.advance_till([Start, BatchCreated]);
{
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap(); let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler let tasks =
.queue index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
} drop(rtxn);
drop(proc);
} }
#[test] #[test]
@@ -271,69 +267,59 @@ fn test_auto_deletion_of_tasks() {
) )
.unwrap(); .unwrap();
{
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap(); let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler let tasks =
.queue index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
} drop(rtxn);
drop(proc);
{
// now we're above the max number of tasks // now we're above the max number of tasks
// and if we try to advance in the tick function a new task deletion should be enqueued // and if we try to advance in the tick function a new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]); handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap(); let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler let tasks =
.queue index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
} drop(rtxn);
drop(proc);
{
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap(); let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler let tasks =
.queue index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
} drop(rtxn);
drop(proc);
handle.advance_one_failed_batch(); handle.advance_one_failed_batch();
// a new task deletion has been enqueued // a new task deletion has been enqueued
handle.advance_one_successful_batch(); handle.advance_one_successful_batch();
{
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap(); let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler let tasks =
.queue index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
} drop(rtxn);
drop(proc);
handle.advance_one_failed_batch(); handle.advance_one_failed_batch();
handle.advance_one_successful_batch(); handle.advance_one_successful_batch();
{
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap(); let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks = index_scheduler let tasks =
.queue index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
} drop(rtxn);
drop(proc);
} }
#[test] #[test]

View File

@@ -74,7 +74,6 @@ impl From<KindWithContent> for AutobatchKind {
| KindWithContent::DumpCreation { .. } | KindWithContent::DumpCreation { .. }
| KindWithContent::Export { .. } | KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. } | KindWithContent::UpgradeDatabase { .. }
| KindWithContent::NetworkTopologyChange(_)
| KindWithContent::SnapshotCreation => { | KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.") panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
} }

View File

@@ -1,13 +1,13 @@
use self::autobatcher::{autobatch, BatchKind};
use super::*;
use crate::TaskId;
use meilisearch_types::milli::update::IndexDocumentsMethod::{ use meilisearch_types::milli::update::IndexDocumentsMethod::{
self, ReplaceDocuments, UpdateDocuments, self, ReplaceDocuments, UpdateDocuments,
}; };
use meilisearch_types::milli::update::MissingDocumentPolicy;
use meilisearch_types::tasks::{BatchStopReason, IndexSwap, KindWithContent}; use meilisearch_types::tasks::{BatchStopReason, IndexSwap, KindWithContent};
use uuid::Uuid; use uuid::Uuid;
use self::autobatcher::{autobatch, BatchKind};
use super::*;
use crate::TaskId;
#[macro_export] #[macro_export]
macro_rules! debug_snapshot { macro_rules! debug_snapshot {
($value:expr, @$snapshot:literal) => {{ ($value:expr, @$snapshot:literal) => {{
@@ -40,7 +40,6 @@ fn doc_imp(
content_file: Uuid::new_v4(), content_file: Uuid::new_v4(),
documents_count: 0, documents_count: 0,
allow_index_creation, allow_index_creation,
on_missing_document: MissingDocumentPolicy::default(),
} }
} }

View File

@@ -1,37 +0,0 @@
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::tasks::Task;
use super::create_batch::Batch;
use crate::scheduler::process_batch::ProcessBatchInfo;
use crate::utils::ProcessingBatch;
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
pub(super) fn process_network_index_batch(
&self,
_network_task: Task,
_inner_batch: Box<Batch>,
_current_batch: &mut ProcessingBatch,
_progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
}
pub(super) fn process_network_ready(
&self,
_task: Task,
_progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
Err(Error::RequiresEnterpriseEdition { action: "processing a network task" })
}
#[cfg(unix)]
pub(super) async fn process_snapshot_to_s3(
&self,
_progress: Progress,
_opts: meilisearch_types::milli::update::S3SnapshotOptions,
_tasks: Vec<Task>,
) -> Result<Vec<Task>> {
Err(Error::RequiresEnterpriseEdition { action: "processing an S3-streaming snapshot task" })
}
}

View File

@@ -2,9 +2,8 @@ use std::fmt;
use std::io::ErrorKind; use std::io::ErrorKind;
use meilisearch_types::heed::RoTxn; use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::{IndexDocumentsMethod, MissingDocumentPolicy}; use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::network::NetworkTopologyState;
use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use uuid::Uuid; use uuid::Uuid;
@@ -60,20 +59,12 @@ pub(crate) enum Batch {
index_uid: String, index_uid: String,
task: Task, task: Task,
}, },
#[allow(clippy::enum_variant_names)] // warranted because we are executing an inner index batch
NetworkIndexBatch {
network_task: Task,
inner_batch: Box<Batch>,
},
NetworkReady {
task: Task,
},
} }
#[derive(Debug)] #[derive(Debug)]
pub(crate) enum DocumentOperation { pub(crate) enum DocumentOperation {
Replace { content_file: Uuid, on_missing_document: MissingDocumentPolicy }, Replace(Uuid),
Update { content_file: Uuid, on_missing_document: MissingDocumentPolicy }, Update(Uuid),
Delete(Vec<String>), Delete(Vec<String>),
} }
@@ -149,14 +140,9 @@ impl Batch {
.. ..
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)), } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
}, },
Batch::IndexSwap { task } | Batch::NetworkReady { task } => { Batch::IndexSwap { task } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
} }
Batch::NetworkIndexBatch { network_task, inner_batch } => {
let mut tasks = inner_batch.ids();
tasks.insert(network_task.uid);
tasks
}
} }
} }
@@ -170,14 +156,12 @@ impl Batch {
| Dump(_) | Dump(_)
| Export { .. } | Export { .. }
| UpgradeDatabase { .. } | UpgradeDatabase { .. }
| NetworkReady { .. }
| IndexSwap { .. } => None, | IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()), IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. } IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. } | IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } | IndexDeletion { index_uid, .. }
| IndexCompaction { index_uid, .. } => Some(index_uid), | IndexCompaction { index_uid, .. } => Some(index_uid),
NetworkIndexBatch { network_task: _, inner_batch } => inner_batch.index_uid(),
} }
} }
} }
@@ -200,8 +184,6 @@ impl fmt::Display for Batch {
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?, Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
Batch::Export { .. } => f.write_str("Export")?, Batch::Export { .. } => f.write_str("Export")?,
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?, Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
Batch::NetworkIndexBatch { .. } => f.write_str("NetworkTopologyChange")?,
Batch::NetworkReady { .. } => f.write_str("NetworkTopologyChange")?,
}; };
match index_uid { match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
@@ -311,22 +293,13 @@ impl IndexScheduler {
for task in tasks.iter() { for task in tasks.iter() {
match task.kind { match task.kind {
KindWithContent::DocumentAdditionOrUpdate { KindWithContent::DocumentAdditionOrUpdate {
content_file, content_file, method, ..
method,
on_missing_document,
..
} => match method { } => match method {
IndexDocumentsMethod::ReplaceDocuments => { IndexDocumentsMethod::ReplaceDocuments => {
operations.push(DocumentOperation::Replace { operations.push(DocumentOperation::Replace(content_file))
content_file,
on_missing_document,
})
} }
IndexDocumentsMethod::UpdateDocuments => { IndexDocumentsMethod::UpdateDocuments => {
operations.push(DocumentOperation::Update { operations.push(DocumentOperation::Update(content_file))
content_file,
on_missing_document,
})
} }
_ => unreachable!("Unknown document merging method"), _ => unreachable!("Unknown document merging method"),
}, },
@@ -479,7 +452,6 @@ impl IndexScheduler {
pub(crate) fn create_next_batch( pub(crate) fn create_next_batch(
&self, &self,
rtxn: &RoTxn, rtxn: &RoTxn,
processing_network_tasks: &RoaringBitmap,
) -> Result<Option<(Batch, ProcessingBatch)>> { ) -> Result<Option<(Batch, ProcessingBatch)>> {
#[cfg(test)] #[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?; self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
@@ -488,6 +460,7 @@ impl IndexScheduler {
let mut current_batch = ProcessingBatch::new(batch_id); let mut current_batch = ProcessingBatch::new(batch_id);
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?; let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let count_total_enqueued = enqueued.len();
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?; let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
// 0. we get the last task to cancel. // 0. we get the last task to cancel.
@@ -536,15 +509,7 @@ impl IndexScheduler {
))); )));
} }
// 2. Check for enqueued network topology changes // 2. we get the next task to delete
let network_changes = self.queue.tasks.get_kind(rtxn, Kind::NetworkTopologyChange)?
& (enqueued | processing_network_tasks);
if let Some(task_id) = network_changes.iter().next() {
let task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
return self.start_processing_network(rtxn, task, enqueued, current_batch);
}
// 3. we get the next task to delete
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() { if !to_delete.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?; let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
@@ -554,7 +519,7 @@ impl IndexScheduler {
return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
} }
// 4. we get the next task to compact // 3. we get the next task to compact
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued; let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
if let Some(task_id) = to_compact.min() { if let Some(task_id) = to_compact.min() {
let mut task = let mut task =
@@ -569,7 +534,7 @@ impl IndexScheduler {
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch))); return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
} }
// 5. we batch the export. // 4. we batch the export.
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued; let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
if !to_export.is_empty() { if !to_export.is_empty() {
let task_id = to_export.iter().next().expect("There must be at least one export task"); let task_id = to_export.iter().next().expect("There must be at least one export task");
@@ -580,7 +545,7 @@ impl IndexScheduler {
return Ok(Some((Batch::Export { task }, current_batch))); return Ok(Some((Batch::Export { task }, current_batch)));
} }
// 6. we batch the snapshot. // 5. we batch the snapshot.
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
if !to_snapshot.is_empty() { if !to_snapshot.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?; let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
@@ -590,7 +555,7 @@ impl IndexScheduler {
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
} }
// 7. we batch the dumps. // 6. we batch the dumps.
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued; let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
if let Some(to_dump) = to_dump.min() { if let Some(to_dump) = to_dump.min() {
let mut task = let mut task =
@@ -603,66 +568,25 @@ impl IndexScheduler {
return Ok(Some((Batch::Dump(task), current_batch))); return Ok(Some((Batch::Dump(task), current_batch)));
} }
let network = self.network(); // 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
// 8. We make a batch from the unprioritised tasks.
let (batch, current_batch) =
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
// We want to execute all tasks, except those that have a version strictly higher than the network version
let Some(task_version) =
task.network.as_ref().map(|tastk_network| tastk_network.network_version())
else {
// do not skip tasks that have no network version, otherwise we will never execute them
return false;
};
// skip tasks with a version strictly higher than the network version
task_version > network.version
})?;
Ok(batch.map(|batch| (batch, current_batch)))
}
fn create_next_batch_unprioritized<F>(
&self,
rtxn: &RoTxn,
enqueued: &RoaringBitmap,
mut current_batch: ProcessingBatch,
mut skip_if: F,
) -> Result<(Option<Batch>, ProcessingBatch)>
where
F: FnMut(&Task) -> bool,
{
let count_total_enqueued = enqueued.len();
let mut enqueued_it = enqueued.iter();
let mut task;
let index_name = loop {
let Some(task_id) = enqueued_it.next() else {
return Ok((None, current_batch));
};
task = self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
if skip_if(&task) {
continue;
}
// If the task is not associated with any index, verify that it is an index swap and // If the task is not associated with any index, verify that it is an index swap and
// create the batch directly. Otherwise, get the index name associated with the task // create the batch directly. Otherwise, get the index name associated with the task
// and use the autobatcher to batch the enqueued tasks associated with it // and use the autobatcher to batch the enqueued tasks associated with it
if let Some(&index_name) = task.indexes().first() { let index_name = if let Some(&index_name) = task.indexes().first() {
break index_name; index_name
} else { } else {
assert!( assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())
);
current_batch.processing(Some(&mut task)); current_batch.processing(Some(&mut task));
current_batch.reason(BatchStopReason::TaskCannotBeBatched { current_batch.reason(BatchStopReason::TaskCannotBeBatched {
kind: Kind::IndexSwap, kind: Kind::IndexSwap,
id: task.uid, id: task.uid,
}); });
return Ok((Some(Batch::IndexSwap { task }), current_batch)); return Ok(Some((Batch::IndexSwap { task }, current_batch)));
};
}; };
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
@@ -697,10 +621,6 @@ impl IndexScheduler {
.get_task(rtxn, task_id) .get_task(rtxn, task_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?; .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
if skip_if(&task) {
continue;
}
if let Some(uuid) = task.content_uuid() { if let Some(uuid) = task.content_uuid() {
let content_size = match self.queue.file_store.compute_size(uuid) { let content_size = match self.queue.file_store.compute_size(uuid) {
Ok(content_size) => content_size, Ok(content_size) => content_size,
@@ -731,127 +651,19 @@ impl IndexScheduler {
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref()) autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
{ {
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason)); current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
let batch = self.create_next_batch_index( return Ok(self
.create_next_batch_index(
rtxn, rtxn,
index_name.to_string(), index_name.to_string(),
batchkind, batchkind,
&mut current_batch, &mut current_batch,
create_index, create_index,
)?; )?
return Ok((batch, current_batch)); .map(|batch| (batch, current_batch)));
} }
// If we found no tasks then we were notified for something that got autobatched // If we found no tasks then we were notified for something that got autobatched
// somehow and there is nothing to do. // somehow and there is nothing to do.
Ok((None, current_batch)) Ok(None)
}
fn start_processing_network(
&self,
rtxn: &RoTxn,
mut task: Task,
enqueued: &RoaringBitmap,
mut current_batch: ProcessingBatch,
) -> Result<Option<(Batch, ProcessingBatch)>> {
current_batch.processing(Some(&mut task));
current_batch.reason(BatchStopReason::NetworkTask { id: task.uid });
let change_version =
task.network.as_ref().map(|network| network.network_version()).unwrap_or_default();
let KindWithContent::NetworkTopologyChange(network_topology_change) = &task.kind else {
panic!("inconsistent kind with content")
};
match network_topology_change.state() {
NetworkTopologyState::WaitingForOlderTasks => {
let res =
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
// in this limited mode of execution, we only want to run tasks:
// 0. with an index
// 1. with a version
// 2. that version strictly lower than the network task version
// 0. skip indexless tasks that are not index swap
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
return true;
}
// 1. skip tasks without version
let Some(task_version) =
task.network.as_ref().map(|network| network.network_version())
else {
return true;
};
// 2. skip tasks with a version equal or higher to the network task version
task_version >= change_version
});
let (batch, mut current_batch) = res?;
let batch = match batch {
Some(batch) => {
let inner_batch = Box::new(batch);
let inner_reason = current_batch.reason.to_string();
current_batch.reason(BatchStopReason::NetworkTaskOlderTasks {
id: task.uid,
inner_reason,
});
Batch::NetworkIndexBatch { network_task: task, inner_batch }
}
None => Batch::NetworkReady { task },
};
Ok(Some((batch, current_batch)))
}
NetworkTopologyState::ImportingDocuments => {
// if the import is done we need to go to the next state
if network_topology_change.is_import_finished() {
return Ok(Some((Batch::NetworkReady { task }, current_batch)));
}
let res =
self.create_next_batch_unprioritized(rtxn, enqueued, current_batch, |task| {
// in this limited mode of execution, we only want to run tasks:
// 0. with an index
// 1. with a version
// 2. that version equal to the network task version
// 0. skip indexless tasks
if task.index_uid().is_none() && task.kind.as_kind() != Kind::IndexSwap {
return true;
}
// 1. skip tasks without version
let Some(task_version) =
task.network.as_ref().map(|network| network.network_version())
else {
return true;
};
// 2. skip tasks with a version different from the network task version
task_version != change_version
});
let (batch, mut current_batch) = res?;
let batch = batch.map(|batch| {
let inner_batch = Box::new(batch);
let inner_reason = current_batch.reason.to_string();
current_batch.reason(BatchStopReason::NetworkTaskImportTasks {
id: task.uid,
inner_reason,
});
(Batch::NetworkIndexBatch { network_task: task, inner_batch }, current_batch)
});
Ok(batch)
}
NetworkTopologyState::ExportingDocuments | NetworkTopologyState::Finished => {
Ok(Some((Batch::NetworkReady { task }, current_batch)))
}
}
} }
} }

View File

@@ -1,845 +0,0 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::time::Duration;
use bumpalo::Bump;
use roaring::RoaringBitmap;
use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer;
use meilisearch_types::milli::update::new::indexer::current_edition::sharding::Shards;
use meilisearch_types::milli::{self};
use meilisearch_types::network::Remote;
use meilisearch_types::tasks::network::{NetworkTopologyState, Origin};
use meilisearch_types::tasks::{KindWithContent, Status, Task};
use super::create_batch::Batch;
use crate::scheduler::process_batch::ProcessBatchInfo;
use crate::scheduler::process_export::{ExportContext, ExportOptions, TargetInstance};
use crate::utils::ProcessingBatch;
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
pub(super) fn process_network_index_batch(
&self,
mut network_task: Task,
inner_batch: Box<Batch>,
current_batch: &mut ProcessingBatch,
progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
let KindWithContent::NetworkTopologyChange(network_topology_change) =
&mut network_task.kind
else {
tracing::error!("unexpected network kind for network task while processing batch");
return Err(Error::CorruptedTaskQueue);
};
let network = network_topology_change.network_for_state();
let (mut tasks, info) =
self.process_batch(*inner_batch, current_batch, progress, network)?;
for task in &tasks {
let Some(network) = task.network.as_ref() else {
continue;
};
let Some(import) = network.import_data() else {
continue;
};
if let Some(index_name) = import.index_name.as_deref() {
network_topology_change.process_remote_tasks(
&import.remote_name,
index_name,
import.document_count,
);
}
}
network_task.details = Some(network_topology_change.to_details());
tasks.push(network_task);
Ok((tasks, info))
}
pub(super) fn process_network_ready(
&self,
mut task: Task,
progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
let KindWithContent::NetworkTopologyChange(network_topology_change) = &mut task.kind else {
tracing::error!("network topology change task has the wrong kind with content");
return Err(Error::CorruptedTaskQueue);
};
let Some(task_network) = &task.network else {
tracing::error!("network topology change task has no network");
return Err(Error::CorruptedTaskQueue);
};
let origin;
let origin = match task_network.origin() {
Some(origin) => origin,
None => {
let myself = network_topology_change.in_name().expect("origin is not the leader");
origin = Origin {
remote_name: myself.to_string(),
task_uid: task.uid,
network_version: task_network.network_version(),
};
&origin
}
};
let mut moved_documents = None;
if let (Some((remotes, out_name)), Some(new_shards)) =
(network_topology_change.export_to_process(), network_topology_change.new_shards())
{
moved_documents = Some(self.balance_documents(
remotes,
out_name,
new_shards,
origin,
&progress,
&self.scheduler.must_stop_processing,
)?);
}
if let Some(moved_documents) = moved_documents {
// we need the mut moved documents to avoid a lifetime error in the previous if let.
network_topology_change.set_moved(moved_documents);
}
network_topology_change.update_state();
if network_topology_change.state() == NetworkTopologyState::Finished {
task.status = Status::Succeeded;
}
task.details = Some(network_topology_change.to_details());
Ok((vec![task], Default::default()))
}
fn balance_documents<'a, I: Iterator<Item = (&'a str, &'a Remote)> + Clone>(
&self,
remotes: I,
out_name: &str,
new_shards: Shards,
network_change_origin: &Origin,
progress: &Progress,
must_stop_processing: &crate::scheduler::MustStopProcessing,
) -> crate::Result<u64> {
// TECHDEBT: this spawns a `ureq` agent additionally to `reqwest`. We probably want to harmonize all of this.
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let mut indexer_alloc = Bump::new();
let scheduler_rtxn = self.env.read_txn()?;
let index_count = self.index_mapper.index_count(&scheduler_rtxn)?;
// when the instance is empty, we still need to tell that to remotes, as they cannot know of that fact and will be waiting for
// data
if index_count == 0 {
for (remote_name, remote) in remotes {
let target = TargetInstance {
remote_name: Some(remote_name),
base_url: &remote.url,
api_key: remote.write_api_key.as_deref(),
};
let res = self.export_no_index(
target,
out_name,
network_change_origin,
&agent,
must_stop_processing,
);
if let Err(err) = res {
tracing::warn!("Could not signal not to wait documents to `{remote_name}` due to error: {err}");
}
}
return Ok(0);
}
let mut total_moved_documents = 0;
self.index_mapper.try_for_each_index::<(), ()>(
&scheduler_rtxn,
|index_uid, index| -> crate::Result<()> {
indexer_alloc.reset();
let err = |err| Error::from_milli(err, Some(index_uid.to_string()));
let index_rtxn = index.read_txn()?;
let all_docids = index.external_documents_ids();
let mut documents_to_move_to =
hashbrown::HashMap::<String, RoaringBitmap>::new();
let mut documents_to_delete = RoaringBitmap::new();
for res in all_docids.iter(&index_rtxn)? {
let (external_docid, docid) = res?;
match new_shards.processing_shard(external_docid) {
Some(shard) if shard.is_own => continue,
Some(shard) => {
documents_to_move_to.entry_ref(&shard.name).or_default().insert(docid);
}
None => {
documents_to_delete.insert(docid);
}
}
}
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
for (remote_name, remote) in remotes.clone() {
let documents_to_move =
documents_to_move_to.remove(remote_name).unwrap_or_default();
let target = TargetInstance {
remote_name: Some(remote_name),
base_url: &remote.url,
api_key: remote.write_api_key.as_deref(),
};
let options = ExportOptions {
index_uid,
payload_size: None,
override_settings: false,
export_mode: super::process_export::ExportMode::NetworkBalancing {
index_count,
export_old_remote_name: out_name,
network_change_origin,
},
};
let ctx = ExportContext {
index,
index_rtxn: &index_rtxn,
universe: &documents_to_move,
progress,
agent: &agent,
must_stop_processing,
};
let res = self.export_one_index(target, options, ctx);
match res {
Ok(_) =>{ documents_to_delete |= documents_to_move;}
Err(err) => {
tracing::warn!("Could not export documents to `{remote_name}` due to error: {err}\n - Note: Documents will be kept");
}
}
}
if documents_to_delete.is_empty() {
return Ok(());
}
total_moved_documents += documents_to_delete.len();
self.delete_documents_from_index(progress, must_stop_processing, &indexer_alloc, index_uid, index, &err, index_rtxn, documents_to_delete, fields_ids_map)
},
)?;
Ok(total_moved_documents)
}
#[allow(clippy::too_many_arguments)]
fn delete_documents_from_index(
&self,
progress: &Progress,
must_stop_processing: &super::MustStopProcessing,
indexer_alloc: &Bump,
index_uid: &str,
index: &milli::Index,
err: &impl Fn(milli::Error) -> Error,
index_rtxn: milli::heed::RoTxn<'_, milli::heed::WithoutTls>,
documents_to_delete: RoaringBitmap,
fields_ids_map: milli::FieldsIdsMap,
) -> std::result::Result<(), Error> {
let mut new_fields_ids_map = fields_ids_map.clone();
// candidates not empty => index not empty => a primary key is set
let primary_key = index.primary_key(&index_rtxn)?.unwrap();
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(milli::Error::from)
.map_err(err)?;
let mut index_wtxn = index.write_txn()?;
let mut indexer = indexer::DocumentDeletion::new();
indexer.delete_documents_by_docids(documents_to_delete);
let document_changes = indexer.into_changes(indexer_alloc, primary_key);
let embedders = index
.embedding_configs()
.embedding_configs(&index_wtxn)
.map_err(milli::Error::from)
.map_err(err)?;
let embedders = self.embedders(index_uid.to_string(), embedders)?;
let indexer_config = self.index_mapper.indexer_config();
let pool = &indexer_config.thread_pool;
indexer::index(
&mut index_wtxn,
index,
pool,
indexer_config.grenad_parameters(),
&fields_ids_map,
new_fields_ids_map,
None, // document deletion never changes primary key
&document_changes,
embedders,
&|| must_stop_processing.get(),
progress,
&EmbedderStats::default(),
)
.map_err(err)?;
// update stats
let mut mapper_wtxn = self.env.write_txn()?;
let stats = crate::index_mapper::IndexStats::new(index, &index_wtxn).map_err(err)?;
self.index_mapper.store_stats_of(&mut mapper_wtxn, index_uid, &stats)?;
index_wtxn.commit()?;
// update stats after committing changes to index
mapper_wtxn.commit()?;
Ok(())
}
#[cfg(unix)]
async fn assume_role_with_web_identity(
role_arn: &str,
web_identity_token_file: &std::path::Path,
) -> anyhow::Result<StsCredentials> {
use std::env::VarError;
let token = tokio::fs::read_to_string(web_identity_token_file)
.await
.map_err(|e| anyhow::anyhow!("Failed to read web identity token file: {e}"))?;
let duration: u32 =
match std::env::var("MEILI_EXPERIMENTAL_S3_WEB_IDENTITY_TOKEN_DURATION_SECONDS") {
Ok(s) => s.parse()?,
Err(VarError::NotPresent) => 3600,
Err(VarError::NotUnicode(e)) => {
anyhow::bail!("Invalid duration: {e:?}")
}
};
let form_data = [
("Action", "AssumeRoleWithWebIdentity"),
("Version", "2011-06-15"),
("RoleArn", role_arn),
("RoleSessionName", "meilisearch-snapshot-session"),
("WebIdentityToken", &token),
("DurationSeconds", &duration.to_string()),
];
let client = reqwest::Client::new();
let response = client
.post("https://sts.amazonaws.com/")
.header(reqwest::header::ACCEPT, "application/json")
.header(reqwest::header::CONTENT_TYPE, "application/x-www-form-urlencoded")
.form(&form_data)
.send()
.await
.map_err(|e| anyhow::anyhow!("Failed to send STS request: {e}"))?;
let status = response.status();
let body = response
.text()
.await
.map_err(|e| anyhow::anyhow!("Failed to read STS response body: {e}"))?;
if !status.is_success() {
return Err(anyhow::anyhow!("STS request failed with status {status}: {body}"));
}
let sts_response: StsResponse = serde_json::from_str(&body)
.map_err(|e| anyhow::anyhow!("Failed to deserialize STS response: {e}"))?;
Ok(sts_response.response.result.credentials)
}
#[cfg(unix)]
async fn extract_credentials_from_options(
s3_access_key: Option<String>,
s3_secret_key: Option<String>,
s3_role_arn: Option<String>,
s3_web_identity_token_file: Option<std::path::PathBuf>,
) -> anyhow::Result<(String, String, Option<String>)> {
let static_credentials = s3_access_key.zip(s3_secret_key);
let web_identity = s3_role_arn.zip(s3_web_identity_token_file);
match (static_credentials, web_identity) {
(Some((access_key, secret_key)), None) => Ok((access_key, secret_key, None)),
(None, Some((role_arn, token_file))) => {
let StsCredentials { access_key_id, secret_access_key, session_token } =
Self::assume_role_with_web_identity(&role_arn, &token_file).await?;
Ok((access_key_id, secret_access_key, Some(session_token)))
}
(_, _) => anyhow::bail!("Clap must pass valid auth parameters"),
}
}
#[cfg(unix)]
pub(super) async fn process_snapshot_to_s3(
&self,
progress: Progress,
opts: meilisearch_types::milli::update::S3SnapshotOptions,
mut tasks: Vec<Task>,
) -> Result<Vec<Task>> {
use meilisearch_types::milli::update::S3SnapshotOptions;
use std::ffi::OsStr;
let S3SnapshotOptions {
s3_bucket_url,
s3_bucket_region,
s3_bucket_name,
s3_snapshot_prefix,
s3_access_key,
s3_secret_key,
s3_role_arn,
s3_web_identity_token_file,
s3_max_in_flight_parts,
s3_compression_level: level,
s3_signature_duration,
s3_multipart_part_size,
} = opts;
let must_stop_processing = self.scheduler.must_stop_processing.clone();
let retry_backoff = backoff::ExponentialBackoff::default();
let db_name = {
let mut base_path = self.env.path().to_owned();
base_path.pop();
base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms").to_string()
};
let (reader, writer) = std::io::pipe()?;
let uploader_task = tokio::spawn(async move {
let (s3_access_key, s3_secret_key, s3_token) = Self::extract_credentials_from_options(
s3_access_key,
s3_secret_key,
s3_role_arn,
s3_web_identity_token_file,
)
.await?;
multipart_stream_to_s3(
s3_bucket_url,
s3_bucket_region,
s3_bucket_name,
s3_snapshot_prefix,
s3_access_key,
s3_secret_key,
s3_token,
s3_max_in_flight_parts,
s3_signature_duration,
s3_multipart_part_size,
must_stop_processing,
retry_backoff,
db_name,
reader,
)
.await
});
let index_scheduler = IndexScheduler::private_clone(self);
let builder_task = tokio::task::spawn_blocking(move || {
stream_tarball_into_pipe(progress, level, writer, index_scheduler)
});
let (uploader_result, builder_result) = tokio::join!(uploader_task, builder_task);
// Check uploader result first to early return on task abortion.
// safety: JoinHandle can return an error if the task was aborted, cancelled, or panicked.
uploader_result.unwrap()?;
builder_result.unwrap()?;
for task in &mut tasks {
task.status = Status::Succeeded;
}
Ok(tasks)
}
}
#[cfg(unix)]
#[derive(Debug, Clone, serde::Deserialize)]
struct StsCredentials {
#[serde(rename = "AccessKeyId")]
access_key_id: String,
#[serde(rename = "SecretAccessKey")]
secret_access_key: String,
#[serde(rename = "SessionToken")]
session_token: String,
}
#[cfg(unix)]
#[derive(Debug, serde::Deserialize)]
struct AssumeRoleWithWebIdentityResult {
#[serde(rename = "Credentials")]
credentials: StsCredentials,
}
#[cfg(unix)]
#[derive(Debug, serde::Deserialize)]
struct AssumeRoleWithWebIdentityResponse {
#[serde(rename = "AssumeRoleWithWebIdentityResult")]
result: AssumeRoleWithWebIdentityResult,
}
#[cfg(unix)]
#[derive(Debug, serde::Deserialize)]
struct StsResponse {
#[serde(rename = "AssumeRoleWithWebIdentityResponse")]
response: AssumeRoleWithWebIdentityResponse,
}
/// Streams a tarball of the database content into a pipe.
#[cfg(unix)]
fn stream_tarball_into_pipe(
progress: Progress,
level: u32,
writer: std::io::PipeWriter,
index_scheduler: IndexScheduler,
) -> std::result::Result<(), Error> {
use std::io::Write as _;
use std::path::Path;
use std::sync::atomic::Ordering;
use meilisearch_types::milli::progress::VariableNameStep;
use meilisearch_types::VERSION_FILE_NAME;
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
use crate::scheduler::process_snapshot_creation::UPDATE_FILES_DIR_NAME;
let writer = flate2::write::GzEncoder::new(writer, flate2::Compression::new(level));
let mut tarball = tar::Builder::new(writer);
// 1. Snapshot the version file
tarball
.append_path_with_name(&index_scheduler.scheduler.version_file_path, VERSION_FILE_NAME)?;
// 2. Snapshot the index scheduler LMDB env
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let tasks_env_file = index_scheduler.env.try_clone_inner_file()?;
let path = Path::new("tasks").join("data.mdb");
append_file_to_tarball(&mut tarball, path, tasks_env_file)?;
// 2.3 Create a read transaction on the index-scheduler
let rtxn = index_scheduler.env.read_txn()?;
// 2.4 Create the update files directory
// And only copy the update files of the enqueued tasks
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
let enqueued = index_scheduler.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
progress.update_progress(update_file_progress);
// We create the update_files directory so that it
// always exists even if there are no update files
let update_files_dir = Path::new(UPDATE_FILES_DIR_NAME);
let src_update_files_dir = {
let mut path = index_scheduler.env.path().to_path_buf();
path.pop();
path.join(UPDATE_FILES_DIR_NAME)
};
tarball.append_dir(update_files_dir, src_update_files_dir)?;
for task_id in enqueued {
let task = index_scheduler
.queue
.tasks
.get_task(&rtxn, task_id)?
.ok_or(Error::CorruptedTaskQueue)?;
if let Some(content_uuid) = task.content_uuid() {
use std::fs::File;
let src = index_scheduler.queue.file_store.update_path(content_uuid);
let mut update_file = File::open(src)?;
let path = update_files_dir.join(content_uuid.to_string());
tarball.append_file(path, &mut update_file)?;
}
atomic.fetch_add(1, Ordering::Relaxed);
}
// 3. Snapshot every indexes
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
let index_mapping = index_scheduler.index_mapper.index_mapping;
let nb_indexes = index_mapping.len(&rtxn)? as u32;
let indexes_dir = Path::new("indexes");
let indexes_references: Vec<_> = index_scheduler
.index_mapper
.index_mapping
.iter(&rtxn)?
.map(|res| res.map_err(Error::from).map(|(name, uuid)| (name.to_string(), uuid)))
.collect::<Result<_, Error>>()?;
// It's prettier to use a for loop instead of the IndexMapper::try_for_each_index
// method, especially when we need to access the UUID, local path and index number.
for (i, (name, uuid)) in indexes_references.into_iter().enumerate() {
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
&name, i as u32, nb_indexes,
));
let path = indexes_dir.join(uuid.to_string()).join("data.mdb");
let index = index_scheduler.index_mapper.index(&rtxn, &name)?;
let index_file = index.try_clone_inner_file()?;
tracing::trace!("Appending index file for {name} in {}", path.display());
append_file_to_tarball(&mut tarball, path, index_file)?;
}
drop(rtxn);
// 4. Snapshot the auth LMDB env
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let auth_env_file = index_scheduler.scheduler.auth_env.try_clone_inner_file()?;
let path = Path::new("auth").join("data.mdb");
append_file_to_tarball(&mut tarball, path, auth_env_file)?;
let mut gzencoder = tarball.into_inner()?;
gzencoder.flush()?;
gzencoder.try_finish()?;
let mut writer = gzencoder.finish()?;
writer.flush()?;
Result::<_, Error>::Ok(())
}
#[cfg(unix)]
fn append_file_to_tarball<W, P>(
tarball: &mut tar::Builder<W>,
path: P,
mut auth_env_file: std::fs::File,
) -> Result<(), Error>
where
W: std::io::Write,
P: AsRef<std::path::Path>,
{
use std::io::{Seek as _, SeekFrom};
// Note: A previous snapshot operation may have left the cursor
// at the end of the file so we need to seek to the start.
auth_env_file.seek(SeekFrom::Start(0))?;
tarball.append_file(path, &mut auth_env_file)?;
Ok(())
}
/// Streams the content read from the given reader to S3.
#[allow(clippy::too_many_arguments)]
#[cfg(unix)]
async fn multipart_stream_to_s3(
s3_bucket_url: String,
s3_bucket_region: String,
s3_bucket_name: String,
s3_snapshot_prefix: String,
s3_access_key: String,
s3_secret_key: String,
s3_token: Option<String>,
s3_max_in_flight_parts: std::num::NonZero<usize>,
s3_signature_duration: std::time::Duration,
s3_multipart_part_size: u64,
must_stop_processing: super::MustStopProcessing,
retry_backoff: backoff::exponential::ExponentialBackoff<backoff::SystemClock>,
db_name: String,
reader: std::io::PipeReader,
) -> Result<(), Error> {
use std::collections::VecDeque;
use std::io;
use std::os::fd::OwnedFd;
use std::path::PathBuf;
use bytes::{Bytes, BytesMut};
use reqwest::{Client, Response};
use rusty_s3::actions::CreateMultipartUpload;
use rusty_s3::{Bucket, BucketError, Credentials, S3Action as _, UrlStyle};
use tokio::task::JoinHandle;
let reader = OwnedFd::from(reader);
let reader = tokio::net::unix::pipe::Receiver::from_owned_fd(reader)?;
let s3_snapshot_prefix = PathBuf::from(s3_snapshot_prefix);
let url =
s3_bucket_url.parse().map_err(BucketError::ParseError).map_err(Error::S3BucketError)?;
let bucket = Bucket::new(url, UrlStyle::Path, s3_bucket_name, s3_bucket_region)
.map_err(Error::S3BucketError)?;
let credential = match s3_token {
Some(token) => Credentials::new_with_token(s3_access_key, s3_secret_key, token),
None => Credentials::new(s3_access_key, s3_secret_key),
};
// Note for the future (rust 1.91+): use with_added_extension, it's prettier
let object_path = s3_snapshot_prefix.join(format!("{db_name}.snapshot"));
// Note: It doesn't work on Windows and if a port to this platform is needed,
// use the slash-path crate or similar to get the correct path separator.
let object = object_path.display().to_string();
let action = bucket.create_multipart_upload(Some(&credential), &object);
let url = action.sign(s3_signature_duration);
let client = Client::new();
let resp = client.post(url).send().await.map_err(Error::S3HttpError)?;
let status = resp.status();
let body = match resp.error_for_status_ref() {
Ok(_) => resp.text().await.map_err(Error::S3HttpError)?,
Err(_) => {
return Err(Error::S3Error { status, body: resp.text().await.unwrap_or_default() })
}
};
let multipart =
CreateMultipartUpload::parse_response(&body).map_err(|e| Error::S3XmlError(Box::new(e)))?;
tracing::debug!("Starting the upload of the snapshot to {object}");
// We use this bumpalo for etags strings.
let bump = bumpalo::Bump::new();
let mut etags = Vec::<&str>::new();
let mut in_flight = VecDeque::<(JoinHandle<reqwest::Result<Response>>, Bytes)>::with_capacity(
s3_max_in_flight_parts.get(),
);
// Part numbers start at 1 and cannot be larger than 10k
for part_number in 1u16.. {
if must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let part_upload =
bucket.upload_part(Some(&credential), &object, part_number, multipart.upload_id());
let url = part_upload.sign(s3_signature_duration);
// Wait for a buffer to be ready if there are in-flight parts that landed
let mut buffer = if in_flight.len() >= s3_max_in_flight_parts.get() {
let (handle, buffer) = in_flight.pop_front().expect("At least one in flight request");
let resp = join_and_map_error(handle).await?;
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
let mut buffer = match buffer.try_into_mut() {
Ok(buffer) => buffer,
Err(_) => unreachable!("All bytes references were consumed in the task"),
};
buffer.clear();
buffer
} else {
BytesMut::with_capacity(s3_multipart_part_size as usize)
};
// If we successfully read enough bytes,
// we can continue and send the buffer/part
while buffer.len() < (s3_multipart_part_size as usize / 2) {
// Wait for the pipe to be readable
reader.readable().await?;
match reader.try_read_buf(&mut buffer) {
Ok(0) => break,
// We read some bytes but maybe not enough
Ok(_) => continue,
// The readiness event is a false positive.
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
Err(e) => return Err(e.into()),
}
}
if buffer.is_empty() {
// Break the loop if the buffer is
// empty after we tried to read bytes
break;
}
let body = buffer.freeze();
tracing::trace!("Sending part {part_number}");
let task = tokio::spawn({
let client = client.clone();
let body = body.clone();
backoff::future::retry(retry_backoff.clone(), move || {
let client = client.clone();
let url = url.clone();
let body = body.clone();
async move {
match client.put(url).body(body).send().await {
Ok(resp) if resp.status().is_client_error() => {
resp.error_for_status().map_err(backoff::Error::Permanent)
}
Ok(resp) => Ok(resp),
Err(e) => Err(backoff::Error::transient(e)),
}
}
})
});
in_flight.push_back((task, body));
}
for (handle, _buffer) in in_flight {
let resp = join_and_map_error(handle).await?;
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
}
tracing::debug!("Finalizing the multipart upload");
let action = bucket.complete_multipart_upload(
Some(&credential),
&object,
multipart.upload_id(),
etags.iter().map(AsRef::as_ref),
);
let url = action.sign(s3_signature_duration);
let body = action.body();
let resp = backoff::future::retry(retry_backoff, move || {
let client = client.clone();
let url = url.clone();
let body = body.clone();
async move {
match client.post(url).body(body).send().await {
Ok(resp) if resp.status().is_client_error() => {
Err(backoff::Error::Permanent(Error::S3Error {
status: resp.status(),
body: resp.text().await.unwrap_or_default(),
}))
}
Ok(resp) => Ok(resp),
Err(e) => Err(backoff::Error::transient(Error::S3HttpError(e))),
}
}
})
.await?;
let status = resp.status();
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
if status.is_success() {
Ok(())
} else {
Err(Error::S3Error { status, body })
}
}
#[cfg(unix)]
async fn join_and_map_error(
join_handle: tokio::task::JoinHandle<Result<reqwest::Response, reqwest::Error>>,
) -> Result<reqwest::Response> {
// safety: Panic happens if the task (JoinHandle) was aborted, cancelled, or panicked
let request = join_handle.await.unwrap();
let resp = request.map_err(Error::S3HttpError)?;
match resp.error_for_status_ref() {
Ok(_) => Ok(resp),
Err(_) => Err(Error::S3Error {
status: resp.status(),
body: resp.text().await.unwrap_or_default(),
}),
}
}
#[cfg(unix)]
fn extract_and_append_etag<'b>(
bump: &'b bumpalo::Bump,
etags: &mut Vec<&'b str>,
headers: &reqwest::header::HeaderMap,
) -> Result<()> {
use reqwest::header::ETAG;
let etag = headers.get(ETAG).ok_or_else(|| Error::S3XmlError("Missing ETag header".into()))?;
let etag = etag.to_str().map_err(|e| Error::S3XmlError(Box::new(e)))?;
etags.push(bump.alloc_str(etag));
Ok(())
}

View File

@@ -1,12 +1,7 @@
mod autobatcher; mod autobatcher;
#[cfg(test)] #[cfg(test)]
mod autobatcher_test; mod autobatcher_test;
#[cfg(not(feature = "enterprise"))]
mod community_edition;
mod create_batch; mod create_batch;
#[cfg(feature = "enterprise")]
mod enterprise_edition;
mod process_batch; mod process_batch;
mod process_dump_creation; mod process_dump_creation;
mod process_export; mod process_export;
@@ -26,10 +21,10 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc; use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls}; use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::update::S3SnapshotOptions;
use meilisearch_types::tasks::Status; use meilisearch_types::tasks::Status;
use process_batch::ProcessBatchInfo; use process_batch::ProcessBatchInfo;
use rayon::current_num_threads; use rayon::current_num_threads;
@@ -92,14 +87,11 @@ pub struct Scheduler {
/// Snapshot compaction status. /// Snapshot compaction status.
pub(crate) experimental_no_snapshot_compaction: bool, pub(crate) experimental_no_snapshot_compaction: bool,
/// S3 Snapshot options.
pub(crate) s3_snapshot_options: Option<S3SnapshotOptions>,
} }
impl Scheduler { impl Scheduler {
pub(crate) fn private_clone(&self) -> Self { pub(crate) fn private_clone(&self) -> Scheduler {
Self { Scheduler {
must_stop_processing: self.must_stop_processing.clone(), must_stop_processing: self.must_stop_processing.clone(),
wake_up: self.wake_up.clone(), wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled, autobatching_enabled: self.autobatching_enabled,
@@ -111,53 +103,23 @@ impl Scheduler {
version_file_path: self.version_file_path.clone(), version_file_path: self.version_file_path.clone(),
embedding_cache_cap: self.embedding_cache_cap, embedding_cache_cap: self.embedding_cache_cap,
experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction, experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction,
s3_snapshot_options: self.s3_snapshot_options.clone(),
} }
} }
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler { pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
let IndexSchedulerOptions {
version_file_path,
auth_path: _,
tasks_path: _,
update_file_path: _,
indexes_path: _,
snapshots_path,
dumps_path,
cli_webhook_url: _,
cli_webhook_authorization: _,
task_db_size: _,
index_base_map_size: _,
enable_mdb_writemap: _,
index_growth_amount: _,
index_count: _,
indexer_config,
autobatching_enabled,
cleanup_enabled: _,
max_number_of_tasks: _,
max_number_of_batched_tasks,
batched_tasks_size_limit,
export_default_payload_size_bytes: _,
instance_features: _,
auto_upgrade: _,
embedding_cache_cap,
experimental_no_snapshot_compaction,
} = options;
Scheduler { Scheduler {
must_stop_processing: MustStopProcessing::default(), must_stop_processing: MustStopProcessing::default(),
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)), wake_up: Arc::new(SignalEvent::auto(true)),
autobatching_enabled: *autobatching_enabled, autobatching_enabled: options.autobatching_enabled,
max_number_of_batched_tasks: *max_number_of_batched_tasks, max_number_of_batched_tasks: options.max_number_of_batched_tasks,
batched_tasks_size_limit: *batched_tasks_size_limit, batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: dumps_path.clone(), dumps_path: options.dumps_path.clone(),
snapshots_path: snapshots_path.clone(), snapshots_path: options.snapshots_path.clone(),
auth_env, auth_env,
version_file_path: version_file_path.clone(), version_file_path: options.version_file_path.clone(),
embedding_cache_cap: *embedding_cache_cap, embedding_cache_cap: options.embedding_cache_cap,
experimental_no_snapshot_compaction: *experimental_no_snapshot_compaction, experimental_no_snapshot_compaction: options.experimental_no_snapshot_compaction,
s3_snapshot_options: indexer_config.s3_snapshot_options.clone(),
} }
} }
} }
@@ -183,8 +145,6 @@ impl IndexScheduler {
self.breakpoint(crate::test_utils::Breakpoint::Start); self.breakpoint(crate::test_utils::Breakpoint::Start);
} }
let previous_processing_batch = self.processing_tasks.write().unwrap().stop_processing();
if self.cleanup_enabled { if self.cleanup_enabled {
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
self.queue.cleanup_task_queue(&mut wtxn)?; self.queue.cleanup_task_queue(&mut wtxn)?;
@@ -192,15 +152,10 @@ impl IndexScheduler {
} }
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
let (batch, mut processing_batch) = match self let (batch, mut processing_batch) =
.create_next_batch(&rtxn, &previous_processing_batch.processing) match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
.map_err(|e| Error::CreateBatch(Box::new(e)))?
{
Some(batch) => batch, Some(batch) => batch,
None => { None => return Ok(TickOutcome::WaitForSignal),
*self.processing_tasks.write().unwrap() = previous_processing_batch;
return Ok(TickOutcome::WaitForSignal);
}
}; };
let index_uid = batch.index_uid().map(ToOwned::to_owned); let index_uid = batch.index_uid().map(ToOwned::to_owned);
drop(rtxn); drop(rtxn);
@@ -231,12 +186,7 @@ impl IndexScheduler {
let handle = std::thread::Builder::new() let handle = std::thread::Builder::new()
.name(String::from("batch-operation")) .name(String::from("batch-operation"))
.spawn_scoped(s, move || { .spawn_scoped(s, move || {
cloned_index_scheduler.process_batch( cloned_index_scheduler.process_batch(batch, processing_batch, p)
batch,
processing_batch,
p,
&self.network(),
)
}) })
.unwrap(); .unwrap();
@@ -277,14 +227,7 @@ impl IndexScheduler {
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?; self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
progress.update_progress(BatchProgress::WritingTasksToDisk); progress.update_progress(BatchProgress::WritingTasksToDisk);
processing_batch.finished(); processing_batch.finished();
// whether the batch made progress.
// a batch make progress if it failed or if it contains at least one fully processed (or cancelled) task.
//
// if a batch did not make progress, it means that all of its tasks are waiting on the scheduler to make progress,
// and so we must wait for new tasks. Such a batch is not persisted to DB, and is resumed on the next tick.
let mut batch_made_progress = false;
let mut stop_scheduler_forever = false; let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new(); let mut canceled = RoaringBitmap::new();
@@ -305,11 +248,7 @@ impl IndexScheduler {
#[allow(unused_variables)] #[allow(unused_variables)]
for (i, mut task) in tasks.into_iter().enumerate() { for (i, mut task) in tasks.into_iter().enumerate() {
task_progress.fetch_add(1, Ordering::Relaxed); task_progress.fetch_add(1, Ordering::Relaxed);
processing_batch.update_from_task(&task); processing_batch.update(&mut task);
if !matches!(task.status, Status::Processing | Status::Enqueued) {
batch_made_progress = true;
processing_batch.finish_task(&mut task);
}
if task.status == Status::Canceled { if task.status == Status::Canceled {
canceled.insert(task.uid); canceled.insert(task.uid);
canceled_by = task.canceled_by; canceled_by = task.canceled_by;
@@ -376,9 +315,6 @@ impl IndexScheduler {
} }
// In case of a failure we must get back and patch all the tasks with the error. // In case of a failure we must get back and patch all the tasks with the error.
Err(err) => { Err(err) => {
// always persist failed batches
batch_made_progress = true;
#[cfg(test)] #[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed); self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
@@ -402,10 +338,7 @@ impl IndexScheduler {
task.status = Status::Failed; task.status = Status::Failed;
task.error = Some(error.clone()); task.error = Some(error.clone());
task.details = task.details.map(|d| d.to_failed()); task.details = task.details.map(|d| d.to_failed());
processing_batch.update_from_task(&task); processing_batch.update(&mut task);
if !matches!(task.status, Status::Processing | Status::Enqueued) {
processing_batch.finish_task(&mut task);
}
#[cfg(test)] #[cfg(test)]
self.maybe_fail( self.maybe_fail(
@@ -428,12 +361,44 @@ impl IndexScheduler {
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } = let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
process_batch_info; process_batch_info;
processing_batch.write_stats( processing_batch.stats.progress_trace =
&progress, progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
congestion, processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
pre_commit_dabases_sizes, let mut congestion_info = serde_json::Map::new();
post_commit_dabases_sizes, congestion_info.insert("attempts".into(), congestion.attempts.into());
); congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info
});
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use std::cmp::Ordering::{Equal, Greater, Less};
use byte_unit::Byte;
use byte_unit::UnitType::Binary;
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
if let Some(congestion) = congestion { if let Some(congestion) = congestion {
tracing::debug!( tracing::debug!(
@@ -446,16 +411,13 @@ impl IndexScheduler {
tracing::debug!("call trace: {:?}", progress.accumulated_durations()); tracing::debug!("call trace: {:?}", progress.accumulated_durations());
if batch_made_progress {
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
}
#[cfg(test)] #[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?; self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
wtxn.commit().map_err(Error::HeedTransaction)?; wtxn.commit().map_err(Error::HeedTransaction)?;
if batch_made_progress {
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
// and then become « not found » for some time until the commit everything is written and the final commit is made. // and then become « not found » for some time until the commit everything is written and the final commit is made.
self.processing_tasks.write().unwrap().stop_processing(); self.processing_tasks.write().unwrap().stop_processing();
@@ -485,10 +447,10 @@ impl IndexScheduler {
})?; })?;
self.notify_webhooks(ids); self.notify_webhooks(ids);
}
#[cfg(test)] #[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing); self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
if stop_scheduler_forever { if stop_scheduler_forever {
Ok(TickOutcome::StopProcessingForever) Ok(TickOutcome::StopProcessingForever)
} else { } else {

View File

@@ -10,7 +10,6 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep}; use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self, ChannelCongestion}; use meilisearch_types::milli::{self, ChannelCongestion};
use meilisearch_types::network::Network;
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use milli::update::Settings as MilliSettings; use milli::update::Settings as MilliSettings;
@@ -56,7 +55,6 @@ impl IndexScheduler {
batch: Batch, batch: Batch,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
progress: Progress, progress: Progress,
network: &Network,
) -> Result<(Vec<Task>, ProcessBatchInfo)> { ) -> Result<(Vec<Task>, ProcessBatchInfo)> {
#[cfg(test)] #[cfg(test)]
{ {
@@ -178,7 +176,6 @@ impl IndexScheduler {
op, op,
&progress, &progress,
current_batch.embedder_stats.clone(), current_batch.embedder_stats.clone(),
network,
)?; )?;
{ {
@@ -238,7 +235,6 @@ impl IndexScheduler {
Batch::IndexUpdate { index_uid, primary_key, new_index_uid: None, task }, Batch::IndexUpdate { index_uid, primary_key, new_index_uid: None, task },
current_batch, current_batch,
progress, progress,
network,
) )
} }
Batch::IndexUpdate { index_uid, primary_key, new_index_uid, mut task } => { Batch::IndexUpdate { index_uid, primary_key, new_index_uid, mut task } => {
@@ -543,10 +539,6 @@ impl IndexScheduler {
Ok((tasks, ProcessBatchInfo::default())) Ok((tasks, ProcessBatchInfo::default()))
} }
Batch::NetworkIndexBatch { network_task, inner_batch } => {
self.process_network_index_batch(network_task, inner_batch, current_batch, progress)
}
Batch::NetworkReady { task } => self.process_network_ready(task, progress),
} }
} }

View File

@@ -1,6 +1,5 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::{self, Write as _}; use std::io::{self, Write as _};
use std::ops::ControlFlow;
use std::sync::atomic; use std::sync::atomic;
use std::time::Duration; use std::time::Duration;
@@ -8,7 +7,6 @@ use backoff::ExponentialBackoff;
use byte_unit::Byte; use byte_unit::Byte;
use flate2::write::GzEncoder; use flate2::write::GzEncoder;
use flate2::Compression; use flate2::Compression;
use meilisearch_types::error::Code;
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata; use meilisearch_types::milli::index::EmbeddingsWithMetadata;
@@ -17,10 +15,7 @@ use meilisearch_types::milli::update::{request_threads, Setting};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError}; use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
use meilisearch_types::settings::{self, SecretPolicy}; use meilisearch_types::settings::{self, SecretPolicy};
use meilisearch_types::tasks::network::headers::SetHeader as _;
use meilisearch_types::tasks::network::{headers, ImportData, ImportMetadata, Origin};
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings}; use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
use roaring::RoaringBitmap;
use serde::Deserialize; use serde::Deserialize;
use ureq::{json, Response}; use ureq::{json, Response};
@@ -55,7 +50,6 @@ impl IndexScheduler {
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build(); let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let must_stop_processing = self.scheduler.must_stop_processing.clone(); let must_stop_processing = self.scheduler.must_stop_processing.clone();
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() { for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
let err = |err| Error::from_milli(err, Some(uid.to_string()));
if must_stop_processing.get() { if must_stop_processing.get() {
return Err(Error::AbortedTask); return Err(Error::AbortedTask);
} }
@@ -67,62 +61,14 @@ impl IndexScheduler {
)); ));
let ExportIndexSettings { filter, override_settings } = export_settings; let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?; let index = self.index(uid)?;
let index_rtxn = index.read_txn()?; let index_rtxn = index.read_txn()?;
let filter = filter.as_ref().map(Filter::from_json).transpose().map_err(err)?.flatten(); let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
let filter_universe =
filter.map(|f| f.evaluate(&index_rtxn, &index)).transpose().map_err(err)?;
let whole_universe =
index.documents_ids(&index_rtxn).map_err(milli::Error::from).map_err(err)?;
let universe = filter_universe.unwrap_or(whole_universe);
let target = TargetInstance { remote_name: None, base_url, api_key };
let ctx = ExportContext {
index: &index,
index_rtxn: &index_rtxn,
universe: &universe,
progress: &progress,
agent: &agent,
must_stop_processing: &must_stop_processing,
};
let options = ExportOptions {
index_uid: uid,
payload_size,
override_settings: *override_settings,
export_mode: ExportMode::ExportRoute,
};
let total_documents = self.export_one_index(target, options, ctx)?;
output.insert( // First, check if the index already exists
IndexUidPattern::new_unchecked(uid.clone()), let url = format!("{base_url}/indexes/{uid}");
DetailsExportIndexSettings { let response = retry(&must_stop_processing, || {
settings: (*export_settings).clone(), let mut request = agent.get(&url);
matched_documents: Some(total_documents),
},
);
}
Ok(output)
}
pub(super) fn export_one_index(
&self,
target: TargetInstance<'_>,
options: ExportOptions<'_>,
ctx: ExportContext<'_>,
) -> Result<u64, Error> {
let err = |err| Error::from_milli(err, Some(options.index_uid.to_string()));
let total_index_documents = ctx.universe.len();
let task_network = options.task_network(total_index_documents);
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
let url = format!(
"{base_url}/indexes/{index_uid}",
base_url = target.base_url,
index_uid = options.index_uid
);
let response = retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.get(&url);
if let Some(bearer) = &bearer { if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer); request = request.set("Authorization", bearer);
} }
@@ -131,146 +77,126 @@ impl IndexScheduler {
}); });
let index_exists = match response { let index_exists = match response {
Ok(response) => response.status() == 200, Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. }) Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
if code == Code::IndexNotFound.name() =>
{
false false
} }
Err(e) => return Err(e), Err(e) => return Err(e),
}; };
let primary_key =
ctx.index.primary_key(ctx.index_rtxn).map_err(milli::Error::from).map_err(err)?; let primary_key = index
.primary_key(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Create the index
if !index_exists { if !index_exists {
let url = format!("{base_url}/indexes", base_url = target.base_url); let url = format!("{base_url}/indexes");
let _ = handle_response( retry(&must_stop_processing, || {
target.remote_name, let mut request = agent.post(&url);
retry(ctx.must_stop_processing, || { if let Some(bearer) = &bearer {
let mut request = ctx.agent.post(&url);
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer); request = request.set("Authorization", bearer);
} }
let index_param = let index_param = json!({ "uid": uid, "primaryKey": primary_key });
json!({ "uid": options.index_uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error) request.send_json(&index_param).map_err(into_backoff_error)
}), })?;
)?;
}
if index_exists && options.override_settings {
let _ = handle_response(
target.remote_name,
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
} }
// Patch the index primary key
if index_exists && *override_settings {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = &bearer { if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer); request = request.set("Authorization", bearer);
} }
let index_param = json!({ "primaryKey": primary_key }); let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error) request.send_json(&index_param).map_err(into_backoff_error)
}), })?;
)?;
} }
if !index_exists || options.override_settings {
// Send the index settings
if !index_exists || *override_settings {
let mut settings = let mut settings =
settings::settings(ctx.index, ctx.index_rtxn, SecretPolicy::RevealSecrets) settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
.map_err(err)?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// Remove the experimental chat setting if not enabled // Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() { if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet; settings.chat = Setting::NotSet;
} }
// Retry logic for sending settings // Retry logic for sending settings
let url = format!( let url = format!("{base_url}/indexes/{uid}/settings");
"{base_url}/indexes/{index_uid}/settings", retry(&must_stop_processing, || {
base_url = target.base_url, let mut request = agent.patch(&url);
index_uid = options.index_uid
);
let _ = handle_response(
target.remote_name,
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
if let Some(bearer) = bearer.as_ref() { if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer); request = request.set("Authorization", bearer);
} }
request.send_json(settings.clone()).map_err(into_backoff_error) request.send_json(settings.clone()).map_err(into_backoff_error)
}), })?;
)?;
} }
let fields_ids_map = ctx.index.fields_ids_map(ctx.index_rtxn)?; let filter = filter
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); .as_ref()
let total_documents = ctx.universe.len() as u32; .map(Filter::from_json)
let (step, progress_step) = AtomicDocumentStep::new(total_documents); .transpose()
ctx.progress.update_progress(progress_step); .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
.flatten();
let limit = options let filter_universe = filter
.payload_size .map(|f| f.evaluate(&index_rtxn, &index))
.map(|ps| ps.as_u64() as usize) .transpose()
.unwrap_or(self.export_default_payload_size_bytes.as_u64() as usize); .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let documents_url = format!( let whole_universe = index
"{base_url}/indexes/{index_uid}/documents", .documents_ids(&index_rtxn)
base_url = target.base_url, .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
index_uid = options.index_uid let universe = filter_universe.unwrap_or(whole_universe);
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// We don't need to keep this one alive as we will
// spawn many threads to process the documents
drop(index_rtxn);
let total_documents = universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
output.insert(
IndexUidPattern::new_unchecked(uid.clone()),
DetailsExportIndexSettings {
settings: (*export_settings).clone(),
matched_documents: Some(total_documents as u64),
},
); );
// no document to send, but we must still send a task when performing network balancing let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
if ctx.universe.is_empty() { let documents_url = format!("{base_url}/indexes/{uid}/documents");
if let Some((import_data, network_change_origin, metadata)) = task_network {
let mut compressed_buffer = Vec::new();
// ignore control flow, we're returning anyway
let _ = send_buffer(
b" ", // needs something otherwise meili complains about missing payload
&mut compressed_buffer,
ctx.must_stop_processing,
ctx.agent,
&documents_url,
target.remote_name,
bearer.as_deref(),
Some(&(import_data, network_change_origin.clone(), metadata)),
&err,
)?;
}
return Ok(0);
}
let results = request_threads() let results = request_threads()
.broadcast(|broadcast| { .broadcast(|ctx| {
let mut task_network = options.task_network(total_index_documents); let index_rtxn = index
.read_txn()
let index_rtxn = ctx.index.read_txn().map_err(milli::Error::from).map_err(err)?; .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new(); let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new(); let mut compressed_buffer = Vec::new();
for (i, docid) in ctx.universe.iter().enumerate() { for (i, docid) in universe.iter().enumerate() {
if i % broadcast.num_threads() != broadcast.index() { if i % ctx.num_threads() != ctx.index() {
continue; continue;
} }
if let Some((import_data, _, metadata)) = &mut task_network {
import_data.document_count += 1;
metadata.task_key = Some(docid);
}
let document = ctx.index.document(&index_rtxn, docid).map_err(err)?; let document = index
.document(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let mut document = let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
obkv_to_json(&all_fields, &fields_ids_map, document).map_err(err)?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// TODO definitely factorize this code // TODO definitely factorize this code
'inject_vectors: { 'inject_vectors: {
let embeddings = ctx.index.embeddings(&index_rtxn, docid).map_err(err)?; let embeddings = index
.embeddings(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if embeddings.is_empty() { if embeddings.is_empty() {
break 'inject_vectors; break 'inject_vectors;
@@ -281,12 +207,15 @@ impl IndexScheduler {
.or_insert(serde_json::Value::Object(Default::default())); .or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else { let serde_json::Value::Object(vectors) = vectors else {
return Err(err(milli::Error::UserError( return Err(Error::from_milli(
milli::Error::UserError(
milli::UserError::InvalidVectorsMapType { milli::UserError::InvalidVectorsMapType {
document_id: { document_id: {
if let Ok(Some(Ok(index))) = ctx if let Ok(Some(Ok(index))) = index
.index .external_id_of(
.external_id_of(&index_rtxn, std::iter::once(docid)) &index_rtxn,
std::iter::once(docid),
)
.map(|it| it.into_iter().next()) .map(|it| it.into_iter().next())
{ {
index index
@@ -296,7 +225,9 @@ impl IndexScheduler {
}, },
value: vectors.clone(), value: vectors.clone(),
}, },
))); ),
Some(uid.to_string()),
));
}; };
for ( for (
@@ -305,9 +236,9 @@ impl IndexScheduler {
) in embeddings ) in embeddings
{ {
let embeddings = ExplicitVectors { let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( embeddings: Some(
embeddings, VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
)), ),
regenerate: regenerate && regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments // Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided, // are marked as user-provided,
@@ -315,40 +246,42 @@ impl IndexScheduler {
// To prevent this, we mark embeddings has non regenerate in this case. // To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments, !has_fragments,
}; };
vectors vectors.insert(
.insert(embedder_name, serde_json::to_value(embeddings).unwrap()); embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
} }
} }
tmp_buffer.clear(); tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document) serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from) .map_err(milli::InternalError::from)
.map_err(milli::Error::from) .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
.map_err(err)?;
// Make sure we put at least one document in the buffer even // Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending // though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit { if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
let control_flow = send_buffer( // We compress the documents before sending them
&buffer, let mut encoder =
&mut compressed_buffer, GzEncoder::new(&mut compressed_buffer, Compression::default());
ctx.must_stop_processing, encoder
ctx.agent, .write_all(&buffer)
&documents_url, .map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
target.remote_name, encoder
bearer.as_deref(), .finish()
task_network.as_ref(), .map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
&err,
)?; retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
buffer.clear(); buffer.clear();
compressed_buffer.clear(); compressed_buffer.clear();
if let Some((import_data, _, metadata)) = &mut task_network {
import_data.document_count = 0;
metadata.task_key = None;
}
if control_flow.is_break() {
return Ok(());
}
} }
buffer.extend_from_slice(&tmp_buffer); buffer.extend_from_slice(&tmp_buffer);
@@ -357,183 +290,31 @@ impl IndexScheduler {
} }
} }
// send the last buffered documents if any retry(&must_stop_processing, || {
if !buffer.is_empty() { let mut request = agent.post(&documents_url);
// ignore control flow here request = request.set("Content-Type", "application/x-ndjson");
let _ = send_buffer(
&buffer,
&mut compressed_buffer,
ctx.must_stop_processing,
ctx.agent,
&documents_url,
target.remote_name,
bearer.as_deref(),
task_network.as_ref(),
&err,
)?;
}
Ok(())
})
.map_err(|e| err(milli::Error::InternalError(InternalError::PanicInThreadPool(e))))?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
Ok(total_documents as u64)
}
#[cfg(feature = "enterprise")] // only used in enterprise edition for now
pub(super) fn export_no_index(
&self,
target: TargetInstance<'_>,
export_old_remote_name: &str,
network_change_origin: &Origin,
agent: &ureq::Agent,
must_stop_processing: &MustStopProcessing,
) -> Result<(), Error> {
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
let url = format!("{base_url}/network", base_url = target.base_url,);
{
let _ = handle_response(
target.remote_name,
retry(must_stop_processing, || {
let request = agent.patch(&url);
let mut request = set_network_ureq_headers(
request,
&ImportData {
remote_name: export_old_remote_name.to_string(),
index_name: None,
document_count: 0,
},
network_change_origin,
&ImportMetadata {
index_count: 0,
task_key: None,
total_index_documents: 0,
},
);
request = request.set("Content-Type", "application/json");
if let Some(bearer) = &bearer { if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer); request = request.set("Authorization", bearer);
} }
request request.send_bytes(&buffer).map_err(into_backoff_error)
.send_json( })?;
// empty payload that will be disregarded
serde_json::Value::Object(Default::default()),
)
.map_err(into_backoff_error)
}),
)?;
}
Ok(()) Ok(())
})
.map_err(|e| {
Error::from_milli(
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
Some(uid.to_string()),
)
})?;
for result in results {
result?;
} }
}
fn set_network_ureq_headers( step.store(total_documents, atomic::Ordering::Relaxed);
request: ureq::Request,
import_data: &ImportData,
origin: &Origin,
metadata: &ImportMetadata,
) -> ureq::Request {
let request = RequestWrapper(request);
let ImportMetadata { index_count, task_key, total_index_documents } = metadata;
let Origin { remote_name: origin_remote, task_uid, network_version } = origin;
let ImportData { remote_name: import_remote, index_name, document_count } = import_data;
let request = request
.set_origin_remote(origin_remote)
.set_origin_task_uid(*task_uid)
.set_origin_network_version(*network_version)
.set_import_remote(import_remote)
.set_import_docs(*document_count)
.set_import_index_count(*index_count)
.set_import_index_docs(*total_index_documents);
let request = if let Some(index_name) = index_name.as_deref() {
request.set_import_index(index_name)
} else {
request
};
let RequestWrapper(request) = if let Some(task_key) = task_key {
request.set_import_task_key(*task_key)
} else {
request
};
request
}
struct RequestWrapper(ureq::Request);
impl headers::SetHeader for RequestWrapper {
fn set_header(self, name: &str, value: &str) -> Self {
Self(self.0.set(name, value))
} }
}
#[allow(clippy::too_many_arguments)] Ok(output)
fn send_buffer<'a>(
buffer: &'a [u8],
mut compressed_buffer: &'a mut Vec<u8>,
must_stop_processing: &MustStopProcessing,
agent: &ureq::Agent,
documents_url: &'a str,
remote_name: Option<&str>,
bearer: Option<&'a str>,
task_network: Option<&(ImportData, Origin, ImportMetadata)>,
err: &'a impl Fn(milli::Error) -> crate::Error,
) -> Result<ControlFlow<(), ()>> {
// We compress the documents before sending them
let mut encoder: GzEncoder<&mut &mut Vec<u8>> =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder.write_all(buffer).map_err(milli::Error::from).map_err(err)?;
encoder.finish().map_err(milli::Error::from).map_err(err)?;
let res = retry(must_stop_processing, || {
let mut request = agent.post(documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = bearer {
request = request.set("Authorization", bearer);
}
if let Some((import_data, origin, metadata)) = task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
request.send_bytes(compressed_buffer).map_err(into_backoff_error)
});
handle_response(remote_name, res)
}
fn handle_response(remote_name: Option<&str>, res: Result<Response>) -> Result<ControlFlow<()>> {
let remote_name = remote_name.unwrap_or("unnamed");
match res {
Ok(_response) => Ok(ControlFlow::Continue(())),
Err(Error::FromRemoteWhenExporting { code, .. })
if code == Code::ImportTaskAlreadyReceived.name() =>
{
Ok(ControlFlow::Continue(()))
}
Err(Error::FromRemoteWhenExporting { code, message, .. })
if code == Code::ImportTaskUnknownRemote.name() =>
{
tracing::warn!("remote `{remote_name}` answered with: {message}");
Ok(ControlFlow::Break(()))
}
// note: there has already been many attempts to get this due to exponential backoff
Err(Error::FromRemoteWhenExporting { code, message, .. })
if code == Code::ImportTaskWithoutNetworkTask.name() =>
{
tracing::warn!("remote `{remote_name}` answered with: {message}");
Ok(ControlFlow::Break(()))
}
Err(e) => {
tracing::warn!("error while exporting: {e}");
Err(e)
}
} }
} }
@@ -593,65 +374,4 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
} }
} }
// export_one_index arguments
pub(super) struct TargetInstance<'a> {
pub(super) remote_name: Option<&'a str>,
pub(super) base_url: &'a str,
pub(super) api_key: Option<&'a str>,
}
pub(super) struct ExportOptions<'a> {
pub(super) index_uid: &'a str,
pub(super) payload_size: Option<&'a Byte>,
pub(super) override_settings: bool,
pub(super) export_mode: ExportMode<'a>,
}
impl ExportOptions<'_> {
fn task_network(
&self,
total_index_documents: u64,
) -> Option<(ImportData, Origin, ImportMetadata)> {
if let ExportMode::NetworkBalancing {
index_count,
export_old_remote_name,
network_change_origin,
} = self.export_mode
{
Some((
ImportData {
remote_name: export_old_remote_name.to_string(),
index_name: Some(self.index_uid.to_string()),
document_count: 0,
},
network_change_origin.clone(),
ImportMetadata { index_count, task_key: None, total_index_documents },
))
} else {
None
}
}
}
pub(super) struct ExportContext<'a> {
pub(super) index: &'a meilisearch_types::milli::Index,
pub(super) index_rtxn: &'a milli::heed::RoTxn<'a>,
pub(super) universe: &'a RoaringBitmap,
pub(super) progress: &'a Progress,
pub(super) agent: &'a ureq::Agent,
pub(super) must_stop_processing: &'a MustStopProcessing,
}
pub(super) enum ExportMode<'a> {
ExportRoute,
#[cfg_attr(not(feature = "enterprise"), allow(dead_code))]
NetworkBalancing {
index_count: u64,
export_old_remote_name: &'a str,
network_change_origin: &'a Origin,
},
}
// progress related
enum ExportIndex {} enum ExportIndex {}

View File

@@ -8,7 +8,6 @@ use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::DocumentAdditionResult; use meilisearch_types::milli::update::DocumentAdditionResult;
use meilisearch_types::milli::{self, ChannelCongestion, Filter}; use meilisearch_types::milli::{self, ChannelCongestion, Filter};
use meilisearch_types::network::Network;
use meilisearch_types::settings::apply_settings_to_builder; use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::Index; use meilisearch_types::Index;
@@ -37,7 +36,6 @@ impl IndexScheduler {
operation: IndexOperation, operation: IndexOperation,
progress: &Progress, progress: &Progress,
embedder_stats: Arc<EmbedderStats>, embedder_stats: Arc<EmbedderStats>,
network: &Network,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> { ) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
@@ -69,6 +67,8 @@ impl IndexScheduler {
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => { IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig); progress.update_progress(DocumentOperationProgress::RetrievingConfig);
let network = self.network();
let shards = network.shards(); let shards = network.shards();
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
@@ -77,8 +77,8 @@ impl IndexScheduler {
let mut content_files = Vec::new(); let mut content_files = Vec::new();
for operation in &operations { for operation in &operations {
match operation { match operation {
DocumentOperation::Replace { content_file: content_uuid, .. } DocumentOperation::Replace(content_uuid)
| DocumentOperation::Update { content_file: content_uuid, .. } => { | DocumentOperation::Update(content_uuid) => {
let content_file = self.queue.file_store.get_update(*content_uuid)?; let content_file = self.queue.file_store.get_update(*content_uuid)?;
let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
content_files.push(mmap); content_files.push(mmap);
@@ -100,16 +100,16 @@ impl IndexScheduler {
let embedders = self.embedders(index_uid.clone(), embedders)?; let embedders = self.embedders(index_uid.clone(), embedders)?;
for operation in operations { for operation in operations {
match operation { match operation {
DocumentOperation::Replace { content_file: _, on_missing_document } => { DocumentOperation::Replace(_content_uuid) => {
let mmap = content_files_iter.next().unwrap(); let mmap = content_files_iter.next().unwrap();
indexer indexer
.replace_documents(mmap, on_missing_document) .replace_documents(mmap)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
} }
DocumentOperation::Update { content_file: _, on_missing_document } => { DocumentOperation::Update(_content_uuid) => {
let mmap = content_files_iter.next().unwrap(); let mmap = content_files_iter.next().unwrap();
indexer indexer
.update_documents(mmap, on_missing_document) .update_documents(mmap)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
} }
DocumentOperation::Delete(document_ids) => { DocumentOperation::Delete(document_ids) => {
@@ -504,7 +504,6 @@ impl IndexScheduler {
}, },
progress, progress,
embedder_stats.clone(), embedder_stats.clone(),
network,
)?; )?;
let (settings_tasks, _congestion) = self.apply_index_operation( let (settings_tasks, _congestion) = self.apply_index_operation(
@@ -513,7 +512,6 @@ impl IndexScheduler {
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
progress, progress,
embedder_stats, embedder_stats,
network,
)?; )?;
let mut tasks = settings_tasks; let mut tasks = settings_tasks;

View File

@@ -12,8 +12,6 @@ use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
use crate::queue::TaskQueue; use crate::queue::TaskQueue;
use crate::{Error, IndexScheduler, Result}; use crate::{Error, IndexScheduler, Result};
pub(crate) const UPDATE_FILES_DIR_NAME: &str = "update_files";
/// # Safety /// # Safety
/// ///
/// See [`EnvOpenOptions::open`]. /// See [`EnvOpenOptions::open`].
@@ -80,32 +78,10 @@ impl IndexScheduler {
pub(super) fn process_snapshot( pub(super) fn process_snapshot(
&self, &self,
progress: Progress, progress: Progress,
tasks: Vec<Task>, mut tasks: Vec<Task>,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation); progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
match self.scheduler.s3_snapshot_options.clone() {
Some(options) => {
#[cfg(not(unix))]
{
let _ = options;
panic!("Non-unix platform does not support S3 snapshotting");
}
#[cfg(unix)]
self.runtime
.as_ref()
.expect("Runtime not initialized")
.block_on(self.process_snapshot_to_s3(progress, options, tasks))
}
None => self.process_snapshots_to_disk(progress, tasks),
}
}
fn process_snapshots_to_disk(
&self,
progress: Progress,
mut tasks: Vec<Task>,
) -> Result<Vec<Task>, Error> {
fs::create_dir_all(&self.scheduler.snapshots_path)?; fs::create_dir_all(&self.scheduler.snapshots_path)?;
let temp_snapshot_dir = tempfile::tempdir()?; let temp_snapshot_dir = tempfile::tempdir()?;
@@ -152,7 +128,7 @@ impl IndexScheduler {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
// 2.4 Create the update files directory // 2.4 Create the update files directory
let update_files_dir = temp_snapshot_dir.path().join(UPDATE_FILES_DIR_NAME); let update_files_dir = temp_snapshot_dir.path().join("update_files");
fs::create_dir_all(&update_files_dir)?; fs::create_dir_all(&update_files_dir)?;
// 2.5 Only copy the update files of the enqueued tasks // 2.5 Only copy the update files of the enqueued tasks
@@ -164,7 +140,7 @@ impl IndexScheduler {
let task = let task =
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
if let Some(content_uuid) = task.content_uuid() { if let Some(content_uuid) = task.content_uuid() {
let src = self.queue.file_store.update_path(content_uuid); let src = self.queue.file_store.get_update_path(content_uuid);
let dst = update_files_dir.join(content_uuid.to_string()); let dst = update_files_dir.join(content_uuid.to_string());
fs::copy(src, dst)?; fs::copy(src, dst)?;
} }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, batch_uid: 0, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} 1 {uid: 1, batch_uid: 0, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -1,12 +1,13 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} 1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, stop reason: "batched all enqueued tasks for index `beavero`", } {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, stop reason: "batched all enqueued tasks for index `beavero`", }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} 3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -6,9 +6,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, batch_uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, batch_uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 2 {uid: 2, batch_uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, batch_uid: 1, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} 3 {uid: 3, batch_uid: 1, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -6,9 +6,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [1,2,] enqueued [1,2,]

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, stop reason: "batched all enqueued tasks for index `beavero`", } {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, stop reason: "batched all enqueued tasks for index `beavero`", }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} 3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -7,7 +7,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks", } {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks", }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} 1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, batch_uid: 0, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} 1 {uid: 1, batch_uid: 0, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -7,7 +7,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks", } {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks", }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} 1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -7,7 +7,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks", } {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks", }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@@ -1,12 +1,13 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [] enqueued []

View File

@@ -1,12 +1,13 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@@ -7,7 +7,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} 2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -7,7 +7,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, batch_uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }} 2 {uid: 2, batch_uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -7,7 +7,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} 0 {uid: 0, status: enqueued, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,1,] enqueued [0,1,]

View File

@@ -7,7 +7,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} 0 {uid: 0, status: enqueued, details: { primary_key: None, old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} 2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -1,12 +1,13 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} 1 {uid: 1, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, batch_uid: 0, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }} 1 {uid: 1, batch_uid: 0, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -1,13 +1,14 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,1,] enqueued [0,1,]

View File

@@ -6,8 +6,8 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [1,] enqueued [1,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, batch_uid: 1, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} 2 {uid: 2, batch_uid: 1, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
3 {uid: 3, batch_uid: 1, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} 3 {uid: 3, batch_uid: 1, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------

View File

@@ -6,8 +6,8 @@ source: crates/index-scheduler/src/scheduler/test.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} 2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -1,13 +1,14 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true, on_missing_document: Create }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,1,] enqueued [0,1,]

Some files were not shown because too many files have changed in this diff Show More