mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-22 20:56:04 +00:00
Compare commits
2 Commits
reduce-pre
...
incrementa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c12576b20 | ||
|
|
8006016a43 |
10
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
10
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@@ -22,16 +22,6 @@ Related product discussion:
|
||||
|
||||
<!---If necessary, create a list with technical/product steps-->
|
||||
|
||||
### Reminders when modifying the API
|
||||
|
||||
- [ ] Update the openAPI file with utoipa:
|
||||
- [ ] If a new module has been introduced, create a new structure deriving [the OpenAPI proc-macro](https://docs.rs/utoipa/latest/utoipa/derive.OpenApi.html) and nest it in the main [openAPI structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L64-L78).
|
||||
- [ ] If a new route has been introduced, add the [path decorator](https://docs.rs/utoipa/latest/utoipa/attr.path.html) to it and add the route at the top of the file in its openAPI structure.
|
||||
- [ ] If a structure which is deserialized or serialized in the API has been introduced or modified, it must derive the [`schema`](https://docs.rs/utoipa/latest/utoipa/macro.schema.html) or the [`IntoParams`](https://docs.rs/utoipa/latest/utoipa/derive.IntoParams.html) proc-macro.
|
||||
If it's a **new** structure you must also add it to the big list of structures [in the main `OpenApi` structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L88).
|
||||
- [ ] Once everything is done, start Meilisearch with the swagger flag: `cargo run --features swagger`, open `http://localhost:7700/scalar` on your browser, and ensure everything works as expected.
|
||||
- For more info, refer to [this presentation](https://pitch.com/v/generating-the-openapi-file-jrn3nh).
|
||||
|
||||
### Reminders when modifying the Setting API
|
||||
|
||||
<!--- Special steps to remind when adding a new index setting -->
|
||||
|
||||
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
4
.github/workflows/bench-pr.yml
vendored
4
.github/workflows/bench-pr.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
reaction-type: "rocket"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v3
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/bench-push-indexing.yml
vendored
2
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
4
.github/workflows/benchmarks-pr.yml
vendored
4
.github/workflows/benchmarks-pr.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
reaction-type: "eyes"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v3
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
100
.github/workflows/check-valid-milestone.yml
vendored
100
.github/workflows/check-valid-milestone.yml
vendored
@@ -1,100 +0,0 @@
|
||||
name: PR Milestone Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
|
||||
branches:
|
||||
- "main"
|
||||
- "release-v*.*.*"
|
||||
|
||||
jobs:
|
||||
check-milestone:
|
||||
name: Check PR Milestone
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Validate PR milestone
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
// Get PR number directly from the event payload
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
|
||||
// Get PR details
|
||||
const { data: prData } = await github.rest.pulls.get({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
pull_number: prNumber
|
||||
});
|
||||
|
||||
// Get base branch name
|
||||
const baseBranch = prData.base.ref;
|
||||
console.log(`Base branch: ${baseBranch}`);
|
||||
|
||||
// Get PR milestone
|
||||
const prMilestone = prData.milestone;
|
||||
if (!prMilestone) {
|
||||
core.setFailed('PR must have a milestone assigned');
|
||||
return;
|
||||
}
|
||||
console.log(`PR milestone: ${prMilestone.title}`);
|
||||
|
||||
// Validate milestone format: vx.y.z
|
||||
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
|
||||
if (!milestoneRegex.test(prMilestone.title)) {
|
||||
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
|
||||
return;
|
||||
}
|
||||
|
||||
// For main branch PRs, check if the milestone is the highest one
|
||||
if (baseBranch === 'main') {
|
||||
// Get all milestones
|
||||
const { data: milestones } = await github.rest.issues.listMilestones({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
state: 'open',
|
||||
sort: 'due_on',
|
||||
direction: 'desc'
|
||||
});
|
||||
|
||||
// Sort milestones by version number (vx.y.z)
|
||||
const sortedMilestones = milestones
|
||||
.filter(m => milestoneRegex.test(m.title))
|
||||
.sort((a, b) => {
|
||||
const versionA = a.title.substring(1).split('.').map(Number);
|
||||
const versionB = b.title.substring(1).split('.').map(Number);
|
||||
|
||||
// Compare major version
|
||||
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
|
||||
// Compare minor version
|
||||
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
|
||||
// Compare patch version
|
||||
return versionB[2] - versionA[2];
|
||||
});
|
||||
|
||||
if (sortedMilestones.length === 0) {
|
||||
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
|
||||
return;
|
||||
}
|
||||
|
||||
const highestMilestone = sortedMilestones[0];
|
||||
console.log(`Highest milestone: ${highestMilestone.title}`);
|
||||
|
||||
if (prMilestone.title !== highestMilestone.title) {
|
||||
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// For release branches, the milestone should match the branch version
|
||||
const branchVersion = baseBranch.substring(8); // remove 'release-'
|
||||
if (prMilestone.title !== branchVersion) {
|
||||
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('PR milestone validation passed!');
|
||||
36
.github/workflows/flaky-tests.yml
vendored
36
.github/workflows/flaky-tests.yml
vendored
@@ -9,22 +9,22 @@ jobs:
|
||||
flaky:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
run: cd crates/dump; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the index-scheduler
|
||||
run: cd crates/index-scheduler; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the auth
|
||||
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in meilisearch
|
||||
run: cd crates/meilisearch; cargo flaky -i 100 --release
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
run: cd crates/dump; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the index-scheduler
|
||||
run: cd crates/index-scheduler; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the auth
|
||||
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in meilisearch
|
||||
run: cd crates/meilisearch; cargo flaky -i 100 --release
|
||||
|
||||
2
.github/workflows/fuzzer-indexing.yml
vendored
2
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
42
.github/workflows/publish-apt-brew-pkg.yml
vendored
42
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -18,28 +18,28 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
steps:
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
asset_name: meilisearch.deb
|
||||
tag: ${{ github.ref }}
|
||||
- name: Upload debian pkg to apt repository
|
||||
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
asset_name: meilisearch.deb
|
||||
tag: ${{ github.ref }}
|
||||
- name: Upload debian pkg to apt repository
|
||||
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
|
||||
|
||||
homebrew:
|
||||
name: Bump Homebrew formula
|
||||
|
||||
74
.github/workflows/publish-binaries.yml
vendored
74
.github/workflows/publish-binaries.yml
vendored
@@ -3,7 +3,7 @@ name: Publish binaries to GitHub release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 2 * * *" # Every day at 2:00am
|
||||
- cron: '0 2 * * *' # Every day at 2:00am
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
@@ -37,26 +37,26 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
tag: ${{ github.ref }}
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-windows:
|
||||
name: Publish binary for ${{ matrix.os }}
|
||||
@@ -74,19 +74,19 @@ jobs:
|
||||
artifact_name: meilisearch.exe
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-apple-silicon:
|
||||
name: Publish binary for macOS silicon
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@@ -127,8 +127,8 @@ jobs:
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
|
||||
2
.github/workflows/sdks-tests.yml
vendored
2
.github/workflows/sdks-tests.yml
vendored
@@ -52,7 +52,7 @@ jobs:
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: "8.0.x"
|
||||
dotnet-version: "6.0.x"
|
||||
- name: Install dependencies
|
||||
run: dotnet restore
|
||||
- name: Build
|
||||
|
||||
86
.github/workflows/test-suite.yml
vendored
86
.github/workflows/test-suite.yml
vendored
@@ -4,9 +4,13 @@ on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: "0 5 * * *"
|
||||
- cron: '0 5 * * *'
|
||||
pull_request:
|
||||
merge_group:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -15,21 +19,21 @@ env:
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-22.04
|
||||
name: Tests on ubuntu-20.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -51,8 +55,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -68,8 +72,8 @@ jobs:
|
||||
name: Tests almost all features
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -77,51 +81,19 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
- name: Run cargo test with almost all features
|
||||
run: |
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
|
||||
ollama-ubuntu:
|
||||
name: Test with Ollama
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Install Ollama
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
||||
- name: Start serving
|
||||
run: |
|
||||
# Run it in the background, there is no way to daemonise at the moment
|
||||
ollama serve &
|
||||
|
||||
# A short pause is required before the HTTP port is opened
|
||||
sleep 5
|
||||
|
||||
# This endpoint blocks until ready
|
||||
time curl -i http://localhost:11434
|
||||
|
||||
- name: Pull nomic-embed-text & all-minilm
|
||||
run: |
|
||||
ollama pull nomic-embed-text
|
||||
ollama pull all-minilm
|
||||
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all --features test-ollama ollama
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:22.04
|
||||
image: ubuntu:20.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -129,7 +101,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@@ -145,17 +117,17 @@ jobs:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -167,12 +139,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -184,14 +156,14 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Install sd
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,7 +10,6 @@
|
||||
/dumps
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
/benchmarks
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
|
||||
@@ -48,27 +48,6 @@ cargo xtask bench --no-dashboard -- workloads/my_workload_1.json workloads/my_wo
|
||||
|
||||
For processing the results, look at [Looking at benchmark results/Without dashboard](#without-dashboard).
|
||||
|
||||
#### Sending a workload by hand
|
||||
|
||||
Sometimes you want to visualize the metrics of a worlkoad that comes from a custom report.
|
||||
It is not quite easy to trick the benchboard in thinking that your report is legitimate but here are the commands you can run to upload your firefox report on a running benchboard.
|
||||
|
||||
```bash
|
||||
# Name this hostname whatever you want
|
||||
echo '{ "hostname": "the-best-place" }' | xh PUT 'http://127.0.0.1:9001/api/v1/machine'
|
||||
|
||||
# You'll receive an UUID from this command that we will call $invocation_uuid
|
||||
echo '{ "commit": { "sha1": "1234567", "commit_date": "2024-09-05 12:00:12.0 +00:00:00", "message": "A cool message" }, "machine_hostname": "the-best-place", "max_workloads": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/invocation'
|
||||
|
||||
# Just use UUID from the previous command
|
||||
# and you'll receive another UUID that we will call $workload_uuid
|
||||
echo '{ "invocation_uuid": "$invocation_uuid", "name": "toto", "max_runs": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/workload'
|
||||
|
||||
# And now use your $workload_uuid and the content of your firefox report
|
||||
# but don't forget to convert your firefox report from JSONLines into an object
|
||||
echo '{ "workload_uuid": "$workload_uuid", "data": $REPORT_JSON_DATA }' | xh PUT 'http://127.0.0.1:9001/api/v1/run'
|
||||
```
|
||||
|
||||
### In CI
|
||||
|
||||
We have dedicated runners to run workloads on CI. Currently, there are three ways of running the CI:
|
||||
|
||||
@@ -95,11 +95,6 @@ Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) wo
|
||||
|
||||
Run `cargo xtask --help` from the root of the repository to find out what is available.
|
||||
|
||||
#### Update the openAPI file if the API changed
|
||||
|
||||
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
|
||||
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
|
||||
|
||||
### Logging
|
||||
|
||||
Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message.
|
||||
@@ -150,7 +145,7 @@ Some notes on GitHub PRs:
|
||||
- The PR title should be accurate and descriptive of the changes.
|
||||
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
|
||||
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
|
||||
## Release Process (for internal team only)
|
||||
|
||||
@@ -158,7 +153,8 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
|
||||
|
||||
### Automation to rebase and Merge the PRs
|
||||
|
||||
This project uses GitHub Merge Queues that helps us manage pull requests merging.
|
||||
This project integrates a bot that helps us manage pull requests merging.<br>
|
||||
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
|
||||
|
||||
### How to Publish a new Release
|
||||
|
||||
|
||||
1511
Cargo.lock
generated
1511
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.14.0"
|
||||
version = "1.12.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
@@ -36,12 +36,6 @@ license = "MIT"
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
|
||||
# We now compile heed without the NDEBUG define for better performance.
|
||||
# However, we still enable debug assertions for a better detection of
|
||||
# disk corruption on the cloud or in OSS.
|
||||
[profile.release.package.heed]
|
||||
debug-assertions = true
|
||||
|
||||
[profile.dev.package.flate2]
|
||||
opt-level = 3
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.81.0-alpine3.20 AS compiler
|
||||
FROM rust:1.79.0-alpine3.20 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
Copyright (c) 2019-2024 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
<p align="center">
|
||||
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a>
|
||||
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
</p>
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
@@ -58,7 +58,6 @@ See the list of all our example apps in our [demos repository](https://github.co
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **AI-ready:** works out of the box with [langchain](https://www.meilisearch.com/with/langchain) and the [model context protocol](https://github.com/meilisearch/meilisearch-mcp)
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
@@ -1403,104 +1403,6 @@
|
||||
"title": "Number of tasks by indexes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 15,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 51
|
||||
},
|
||||
"id": 29,
|
||||
"interval": "5s",
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.1.4",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_task_queue_latency_seconds{instance=\"$instance\", job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "{{value}} ",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Task queue latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"datasource": {
|
||||
|
||||
11
bors.toml
Normal file
11
bors.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
status = [
|
||||
'Tests on ubuntu-20.04',
|
||||
'Tests on macos-13',
|
||||
'Tests on windows-2022',
|
||||
'Run Clippy',
|
||||
'Run Rustfmt',
|
||||
'Run tests in debug',
|
||||
]
|
||||
pr_status = ['Milestone Check']
|
||||
# 3 hours timeout
|
||||
timeout-sec = 10800
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
anyhow = "1.0.86"
|
||||
bumpalo = "3.16.0"
|
||||
csv = "1.3.1"
|
||||
csv = "1.3.0"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.14.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.10"
|
||||
roaring = "0.10.7"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
anyhow = "1.0.86"
|
||||
bytes = "1.6.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.30"
|
||||
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
@@ -10,9 +10,9 @@ use milli::documents::PrimaryKey;
|
||||
use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{FilterableAttributesRule, Index};
|
||||
use milli::Index;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -35,11 +35,10 @@ fn setup_dir(path: impl AsRef<Path>) {
|
||||
fn setup_index() -> Index {
|
||||
let path = "benches.mmdb";
|
||||
setup_dir(path);
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
Index::new(options, path, true).unwrap()
|
||||
Index::new(options, path).unwrap()
|
||||
}
|
||||
|
||||
fn setup_settings<'t>(
|
||||
@@ -58,8 +57,7 @@ fn setup_settings<'t>(
|
||||
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields =
|
||||
filterable_fields.iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
|
||||
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||
@@ -140,9 +138,10 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -206,9 +205,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -250,9 +250,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -318,9 +319,10 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -394,9 +396,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -438,9 +441,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -478,9 +482,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -544,10 +549,11 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -611,9 +617,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -677,9 +684,10 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -742,9 +750,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -786,9 +795,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -853,9 +863,10 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -928,10 +939,11 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -973,10 +985,11 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1014,10 +1027,11 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1081,9 +1095,10 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1146,9 +1161,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1190,9 +1206,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1257,9 +1274,10 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1369,9 +1387,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1413,9 +1432,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1453,9 +1473,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1542,9 +1563,10 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1632,9 +1654,10 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1714,9 +1737,10 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1780,9 +1804,10 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1845,9 +1870,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1889,9 +1915,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -1956,9 +1983,10 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
|
||||
@@ -2,7 +2,7 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
@@ -21,10 +21,8 @@ fn base_conf(builder: &mut Settings) {
|
||||
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields = ["_geo", "population", "elevation"]
|
||||
.iter()
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.collect();
|
||||
let filterable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields =
|
||||
|
||||
@@ -2,7 +2,7 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
@@ -22,7 +22,7 @@ fn base_conf(builder: &mut Settings) {
|
||||
|
||||
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ use memmap2::Mmap;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
@@ -65,11 +65,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
}
|
||||
create_dir_all(conf.database_name).unwrap();
|
||||
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
let index = Index::new(options, conf.database_name, true).unwrap();
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
@@ -100,8 +99,8 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
time = { version = "0.3.36", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
anyhow = "1.0.86"
|
||||
vergen-git2 = "1.0.0"
|
||||
|
||||
@@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
anyhow = "1.0.86"
|
||||
flate2 = "1.0.30"
|
||||
http = "1.1.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.20.2"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -10,10 +10,8 @@ dump
|
||||
├── instance-uid.uuid
|
||||
├── keys.jsonl
|
||||
├── metadata.json
|
||||
├── tasks
|
||||
│ ├── update_files
|
||||
│ │ └── [task_id].jsonl
|
||||
│ └── queue.jsonl
|
||||
└── batches
|
||||
└── tasks
|
||||
├── update_files
|
||||
│ └── [task_id].jsonl
|
||||
└── queue.jsonl
|
||||
```
|
||||
```
|
||||
@@ -141,9 +141,6 @@ pub enum KindDump {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -213,9 +210,6 @@ impl From<KindWithContent> for KindDump {
|
||||
KindDump::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -228,16 +222,14 @@ pub(crate) mod test {
|
||||
|
||||
use big_s::S;
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{Details, Kind, Status};
|
||||
use meilisearch_types::tasks::{Details, Status};
|
||||
use serde_json::{json, Map, Value};
|
||||
use time::macros::datetime;
|
||||
use uuid::Uuid;
|
||||
@@ -279,10 +271,7 @@ pub(crate) mod test {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
|
||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
|
||||
filterable_attributes: Setting::Set(vec![
|
||||
FilterableAttributesRule::Field(S("race")),
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
]),
|
||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
@@ -310,33 +299,6 @@ pub(crate) mod test {
|
||||
settings.check()
|
||||
}
|
||||
|
||||
pub fn create_test_batches() -> Vec<Batch> {
|
||||
vec![Batch {
|
||||
uid: 0,
|
||||
details: DetailsView {
|
||||
received_documents: Some(12),
|
||||
indexed_documents: Some(Some(10)),
|
||||
..DetailsView::default()
|
||||
},
|
||||
progress: None,
|
||||
stats: BatchStats {
|
||||
total_nb_tasks: 1,
|
||||
status: maplit::btreemap! { Status::Succeeded => 1 },
|
||||
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
|
||||
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
|
||||
progress_trace: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
internal_database_sizes: Default::default(),
|
||||
},
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||
}),
|
||||
started_at: datetime!(2022-11-20 0:00 UTC),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
}]
|
||||
}
|
||||
|
||||
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
|
||||
vec![
|
||||
(
|
||||
@@ -459,15 +421,6 @@ pub(crate) mod test {
|
||||
index.flush().unwrap();
|
||||
index.settings(&settings).unwrap();
|
||||
|
||||
// ========== pushing the batch queue
|
||||
let batches = create_test_batches();
|
||||
|
||||
let mut batch_queue = dump.create_batches_queue().unwrap();
|
||||
for batch in &batches {
|
||||
batch_queue.push_batch(batch).unwrap();
|
||||
}
|
||||
batch_queue.flush().unwrap();
|
||||
|
||||
// ========== pushing the task queue
|
||||
let tasks = create_test_tasks();
|
||||
|
||||
@@ -496,10 +449,6 @@ pub(crate) mod test {
|
||||
|
||||
dump.create_experimental_features(features).unwrap();
|
||||
|
||||
// ========== network
|
||||
let network = create_test_network();
|
||||
dump.create_network(network).unwrap();
|
||||
|
||||
// create the dump
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
dump.persist_to(&mut file).unwrap();
|
||||
@@ -509,14 +458,7 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
fn create_test_features() -> RuntimeTogglableFeatures {
|
||||
RuntimeTogglableFeatures::default()
|
||||
}
|
||||
|
||||
fn create_test_network() -> Network {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
|
||||
}
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -567,9 +509,5 @@ pub(crate) mod test {
|
||||
// ==== checking the features
|
||||
let expected = create_test_features();
|
||||
assert_eq!(dump.features().unwrap().unwrap(), expected);
|
||||
|
||||
// ==== checking the network
|
||||
let expected = create_test_network();
|
||||
assert_eq!(&expected, dump.network().unwrap().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,10 +196,6 @@ impl CompatV5ToV6 {
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CompatIndexV5ToV6 {
|
||||
@@ -322,16 +318,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v6::Settings {
|
||||
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
|
||||
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
|
||||
filterable_attributes: match settings.filterable_attributes {
|
||||
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
|
||||
filterable_attributes
|
||||
.into_iter()
|
||||
.map(v6::FilterableAttributesRule::Field)
|
||||
.collect(),
|
||||
),
|
||||
v5::settings::Setting::Reset => v6::Setting::Reset,
|
||||
v5::settings::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
filterable_attributes: settings.filterable_attributes.into(),
|
||||
sortable_attributes: settings.sortable_attributes.into(),
|
||||
ranking_rules: {
|
||||
match settings.ranking_rules {
|
||||
|
||||
@@ -23,7 +23,6 @@ mod v6;
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum DumpReader {
|
||||
Current(V6Reader),
|
||||
Compat(CompatV5ToV6),
|
||||
@@ -102,13 +101,6 @@ impl DumpReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.batches()),
|
||||
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.keys()),
|
||||
@@ -122,13 +114,6 @@ impl DumpReader {
|
||||
DumpReader::Compat(compat) => compat.features(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.network()),
|
||||
DumpReader::Compat(compat) => compat.network(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V6Reader> for DumpReader {
|
||||
@@ -234,10 +219,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -346,8 +327,10 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
assert_eq!(dump.network().unwrap(), None);
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -359,10 +342,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -394,28 +373,10 @@ pub(crate) mod test {
|
||||
|
||||
assert_eq!(test.documents().unwrap().count(), 1);
|
||||
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v6_network() {
|
||||
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
|
||||
let dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// network
|
||||
|
||||
let network = dump.network().unwrap().unwrap();
|
||||
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -427,10 +388,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -511,10 +468,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -592,10 +545,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -689,10 +638,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -786,10 +731,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -866,10 +807,6 @@ pub(crate) mod test {
|
||||
assert_eq!(dump.date(), None);
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/dump/src/reader/mod.rs
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: vector_index.settings().unwrap()
|
||||
---
|
||||
{
|
||||
@@ -49,7 +49,6 @@ expression: vector_index.settings().unwrap()
|
||||
"source": "huggingFace",
|
||||
"model": "BAAI/bge-base-en-v1.5",
|
||||
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
|
||||
"pooling": "forceMean",
|
||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
|
||||
}
|
||||
},
|
||||
|
||||
@@ -3,7 +3,6 @@ use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
|
||||
pub use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::vector::hf::OverridePooling;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
@@ -19,10 +18,8 @@ pub type Checked = meilisearch_types::settings::Checked;
|
||||
pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
@@ -46,17 +43,13 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||
pub type Code = meilisearch_types::error::Code;
|
||||
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
|
||||
|
||||
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Option<Uuid>,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
batches: Option<BufReader<File>>,
|
||||
keys: BufReader<File>,
|
||||
features: Option<RuntimeTogglableFeatures>,
|
||||
network: Option<Network>,
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
@@ -84,38 +77,13 @@ impl V6Reader {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
|
||||
Ok(file) => Some(BufReader::new(file)),
|
||||
// The batch file was only introduced during the v1.13, anything prior to that won't have batches
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => None,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let network_file = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(network_file),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
let network = if let Some(network_file) = network_file {
|
||||
Some(serde_json::from_reader(&*network_file)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
batches,
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
features,
|
||||
network,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
@@ -156,7 +124,7 @@ impl V6Reader {
|
||||
&mut self,
|
||||
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
let task: Task = serde_json::from_str(&line?).unwrap();
|
||||
|
||||
let update_file_path = self
|
||||
.dump
|
||||
@@ -168,7 +136,8 @@ impl V6Reader {
|
||||
if update_file_path.exists() {
|
||||
Ok((
|
||||
task,
|
||||
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
|
||||
Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
|
||||
as Box<super::UpdateFile>),
|
||||
))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
@@ -176,16 +145,6 @@ impl V6Reader {
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
|
||||
match self.batches.as_mut() {
|
||||
Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
|
||||
let batch = serde_json::from_str(&line?)?;
|
||||
Ok(batch)
|
||||
})),
|
||||
None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
|
||||
Box::new(
|
||||
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
@@ -195,10 +154,6 @@ impl V6Reader {
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Option<&Network> {
|
||||
self.network.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
@@ -255,29 +210,7 @@ impl V6IndexReader {
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
patch_embedders(&mut settings);
|
||||
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
Ok(settings.check())
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_embedders(settings: &mut Settings<Unchecked>) {
|
||||
if let Setting::Set(embedders) = &mut settings.embedders {
|
||||
for settings in embedders.values_mut() {
|
||||
let Setting::Set(settings) = &mut settings.inner else {
|
||||
continue;
|
||||
};
|
||||
if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
settings.pooling = match settings.pooling {
|
||||
Setting::Set(pooling) => Setting::Set(pooling),
|
||||
// if the pooling for a hugging face embedder is not set, force it to `forceMean`
|
||||
// for backward compatibility with v1.13
|
||||
// dumps created in v1.14 and up will have the setting set for hugging face embedders
|
||||
Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,8 +4,7 @@ use std::path::PathBuf;
|
||||
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
@@ -55,10 +54,6 @@ impl DumpWriter {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
|
||||
pub fn create_batches_queue(&self) -> Result<BatchWriter> {
|
||||
BatchWriter::new(self.dir.path().join("batches"))
|
||||
}
|
||||
|
||||
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
Ok(std::fs::write(
|
||||
self.dir.path().join("experimental-features.json"),
|
||||
@@ -66,10 +61,6 @@ impl DumpWriter {
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn create_network(&self, network: Network) -> Result<()> {
|
||||
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
|
||||
}
|
||||
|
||||
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
|
||||
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
|
||||
let mut tar_encoder = tar::Builder::new(gz_encoder);
|
||||
@@ -93,7 +84,7 @@ impl KeyWriter {
|
||||
}
|
||||
|
||||
pub fn push_key(&mut self, key: &Key) -> Result<()> {
|
||||
serde_json::to_writer(&mut self.keys, &key)?;
|
||||
self.keys.write_all(&serde_json::to_vec(key)?)?;
|
||||
self.keys.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -123,7 +114,7 @@ impl TaskWriter {
|
||||
/// Pushes tasks in the dump.
|
||||
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
|
||||
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
|
||||
serde_json::to_writer(&mut self.queue, &task)?;
|
||||
self.queue.write_all(&serde_json::to_vec(task)?)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
|
||||
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
|
||||
@@ -135,30 +126,6 @@ impl TaskWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BatchWriter {
|
||||
queue: BufWriter<File>,
|
||||
}
|
||||
|
||||
impl BatchWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
let queue = File::create(path.join("queue.jsonl"))?;
|
||||
Ok(BatchWriter { queue: BufWriter::new(queue) })
|
||||
}
|
||||
|
||||
/// Pushes batches in the dump.
|
||||
pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
|
||||
serde_json::to_writer(&mut self.queue, &batch)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn flush(mut self) -> Result<()> {
|
||||
self.queue.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
writer: Option<BufWriter<File>>,
|
||||
@@ -170,8 +137,8 @@ impl UpdateFile {
|
||||
}
|
||||
|
||||
pub fn push_document(&mut self, document: &Document) -> Result<()> {
|
||||
if let Some(mut writer) = self.writer.as_mut() {
|
||||
serde_json::to_writer(&mut writer, &document)?;
|
||||
if let Some(writer) = self.writer.as_mut() {
|
||||
writer.write_all(&serde_json::to_vec(document)?)?;
|
||||
writer.write_all(b"\n")?;
|
||||
} else {
|
||||
let file = File::create(&self.path).unwrap();
|
||||
@@ -238,8 +205,8 @@ pub(crate) mod test {
|
||||
use super::*;
|
||||
use crate::reader::Document;
|
||||
use crate::test::{
|
||||
create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
|
||||
create_test_instance_uid, create_test_settings, create_test_tasks,
|
||||
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
|
||||
create_test_settings, create_test_tasks,
|
||||
};
|
||||
|
||||
fn create_directory_hierarchy(dir: &Path) -> String {
|
||||
@@ -314,10 +281,8 @@ pub(crate) mod test {
|
||||
let dump_path = dump.path();
|
||||
|
||||
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
||||
.
|
||||
├---- batches/
|
||||
│ └---- queue.jsonl
|
||||
├---- indexes/
|
||||
│ └---- doggos/
|
||||
│ │ ├---- documents.jsonl
|
||||
@@ -330,9 +295,8 @@ pub(crate) mod test {
|
||||
├---- experimental-features.json
|
||||
├---- instance_uid.uuid
|
||||
├---- keys.jsonl
|
||||
├---- metadata.json
|
||||
└---- network.json
|
||||
");
|
||||
└---- metadata.json
|
||||
"###);
|
||||
|
||||
// ==== checking the top level infos
|
||||
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
|
||||
@@ -385,16 +349,6 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
// ==== checking the batch queue
|
||||
let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
|
||||
for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
|
||||
let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
|
||||
if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
|
||||
batch.details.settings = None;
|
||||
}
|
||||
assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
|
||||
}
|
||||
|
||||
// ==== checking the keys
|
||||
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
|
||||
for (key, expected) in keys.lines().zip(create_test_api_keys()) {
|
||||
|
||||
Binary file not shown.
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
||||
@@ -17,5 +17,4 @@ nom_locate = "4.2.0"
|
||||
unescaper = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
insta = "1.39.0"
|
||||
|
||||
@@ -30,25 +30,6 @@ pub enum Condition<'a> {
|
||||
StartsWith { keyword: Token<'a>, word: Token<'a> },
|
||||
}
|
||||
|
||||
impl Condition<'_> {
|
||||
pub fn operator(&self) -> &str {
|
||||
match self {
|
||||
Condition::GreaterThan(_) => ">",
|
||||
Condition::GreaterThanOrEqual(_) => ">=",
|
||||
Condition::Equal(_) => "=",
|
||||
Condition::NotEqual(_) => "!=",
|
||||
Condition::Null => "IS NULL",
|
||||
Condition::Empty => "IS EMPTY",
|
||||
Condition::Exists => "EXISTS",
|
||||
Condition::LowerThan(_) => "<",
|
||||
Condition::LowerThanOrEqual(_) => "<=",
|
||||
Condition::Between { .. } => "TO",
|
||||
Condition::Contains { .. } => "CONTAINS",
|
||||
Condition::StartsWith { .. } => "STARTS WITH",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// condition = value ("==" | ">" ...) value
|
||||
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||
|
||||
@@ -179,26 +179,6 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
|
||||
if depth == 0 {
|
||||
return Box::new(std::iter::empty());
|
||||
}
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } | FilterCondition::In { fid, .. } => {
|
||||
Box::new(std::iter::once(fid))
|
||||
}
|
||||
FilterCondition::Not(filter) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
filter.fids(depth)
|
||||
}
|
||||
FilterCondition::And(subfilters) | FilterCondition::Or(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
Box::new(subfilters.iter().flat_map(move |f| f.fids(depth)))
|
||||
}
|
||||
_ => Box::new(std::iter::empty()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
match self {
|
||||
@@ -998,43 +978,6 @@ pub mod tests {
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fids() {
|
||||
let filter = Fc::parse("field = value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field IN [1, 2, 3]").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field != value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 AND field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 OR field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let depth = 2;
|
||||
let filter =
|
||||
Fc::parse("field1 = value1 AND (field2 = value2 OR field3 = value3)").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(depth).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_from_str() {
|
||||
let s = "test string that should not be parsed";
|
||||
|
||||
@@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
fastrand = "2.3.0"
|
||||
fastrand = "2.1.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.10.1"
|
||||
|
||||
@@ -12,7 +12,7 @@ use milli::documents::mmap_from_objects;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::Index;
|
||||
use serde_json::Value;
|
||||
@@ -57,14 +57,13 @@ fn main() {
|
||||
let opt = opt.clone();
|
||||
|
||||
let handle = std::thread::spawn(move || {
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(1024 * 1024 * 1024 * 1024);
|
||||
let tempdir = match opt.path {
|
||||
Some(path) => TempDir::new_in(path).unwrap(),
|
||||
None => TempDir::new().unwrap(),
|
||||
};
|
||||
let index = Index::new(options, tempdir.path(), true).unwrap();
|
||||
let index = Index::new(options, tempdir.path()).unwrap();
|
||||
let indexer_config = IndexerConfig::default();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
@@ -90,7 +89,9 @@ fn main() {
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let mut indexer = indexer::DocumentOperation::new(
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
);
|
||||
|
||||
let mut operations = Vec::new();
|
||||
for op in batch.0 {
|
||||
@@ -114,7 +115,7 @@ fn main() {
|
||||
for op in &operations {
|
||||
match op {
|
||||
Either::Left(documents) => {
|
||||
indexer.replace_documents(documents).unwrap()
|
||||
indexer.add_documents(documents).unwrap()
|
||||
}
|
||||
Either::Right(ids) => indexer.delete_documents(ids),
|
||||
}
|
||||
|
||||
@@ -11,44 +11,42 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
anyhow = "1.0.86"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
bumparaw-collections = "0.1.2"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.20.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
indexmap = "2.7.0"
|
||||
flate2 = "1.0.30"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.5"
|
||||
memmap2 = "0.9.4"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam-channel = "0.5.14"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
crossbeam-channel = "0.5.13"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
901
crates/index-scheduler/src/autobatcher.rs
Normal file
901
crates/index-scheduler/src/autobatcher.rs
Normal file
@@ -0,0 +1,901 @@
|
||||
/*!
|
||||
The autobatcher is responsible for combining the next enqueued
|
||||
tasks affecting a single index into a [batch](crate::batch::Batch).
|
||||
|
||||
The main function of the autobatcher is [`next_autobatch`].
|
||||
*/
|
||||
|
||||
use std::ops::ControlFlow::{self, Break, Continue};
|
||||
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
self, ReplaceDocuments, UpdateDocuments,
|
||||
};
|
||||
use meilisearch_types::tasks::TaskId;
|
||||
|
||||
use crate::KindWithContent;
|
||||
|
||||
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
|
||||
/// for the purpose of simplifying the implementation of the autobatcher.
|
||||
///
|
||||
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
|
||||
enum AutobatchKind {
|
||||
DocumentImport {
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
DocumentEdition,
|
||||
DocumentDeletion {
|
||||
by_filter: bool,
|
||||
},
|
||||
DocumentClear,
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
IndexCreation,
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { allow_index_creation, .. }
|
||||
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<KindWithContent> for AutobatchKind {
|
||||
fn from(kind: KindWithContent) -> Self {
|
||||
match kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
..
|
||||
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
||||
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||
KindWithContent::DocumentDeletion { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: false }
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: true }
|
||||
}
|
||||
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
||||
AutobatchKind::Settings {
|
||||
allow_index_creation: allow_index_creation && !is_deletion,
|
||||
}
|
||||
}
|
||||
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BatchKind {
|
||||
DocumentClear {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentOperation {
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentEdition {
|
||||
id: TaskId,
|
||||
},
|
||||
DocumentDeletion {
|
||||
deletion_ids: Vec<TaskId>,
|
||||
includes_by_filter: bool,
|
||||
},
|
||||
ClearAndSettings {
|
||||
other: Vec<TaskId>,
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
IndexDeletion {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
IndexCreation {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexUpdate {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
// TODO use an AutoBatchKind as input
|
||||
pub fn new(
|
||||
task_id: TaskId,
|
||||
kind: KindWithContent,
|
||||
primary_key: Option<&str>,
|
||||
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match AutobatchKind::from(kind) {
|
||||
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
|
||||
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
|
||||
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
|
||||
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { method, allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
{
|
||||
(
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key: pk,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
)
|
||||
}
|
||||
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
|
||||
K::DocumentImport { method, allow_index_creation, primary_key } => (
|
||||
Break(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
),
|
||||
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||
K::DocumentDeletion { by_filter: includes_by_filter } => (
|
||||
Continue(BatchKind::DocumentDeletion {
|
||||
deletion_ids: vec![task_id],
|
||||
includes_by_filter,
|
||||
}),
|
||||
false,
|
||||
),
|
||||
K::Settings { allow_index_creation } => (
|
||||
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
||||
allow_index_creation,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
#[rustfmt::skip]
|
||||
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match (self, kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break(this)
|
||||
},
|
||||
// NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue.
|
||||
// I wrote it this way because it's easier to understand than the other way around.
|
||||
(this, kind) if !(
|
||||
// 1. If both task don't interact with primary key -> we can continue
|
||||
(this.primary_key().is_none() && kind.primary_key().is_none()) ||
|
||||
// 2. Else ->
|
||||
(
|
||||
// 2.1 If we already have a primary-key ->
|
||||
(
|
||||
primary_key.is_some() &&
|
||||
// 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key
|
||||
// 2.1.2 If the task don't have a primary-key -> we can continue
|
||||
kind.primary_key().map_or(true, |pk| pk == primary_key)
|
||||
) ||
|
||||
// 2.2 If we don't have a primary-key ->
|
||||
(
|
||||
// 2.2.1 If both the batch and the task have a primary key they should be equal
|
||||
// 2.2.2 If the batch is set to Some(None), the task should be too
|
||||
// 2.2.3 If the batch is set to None -> we can continue
|
||||
this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind)
|
||||
)
|
||||
)
|
||||
|
||||
) // closing the negation
|
||||
|
||||
=> {
|
||||
Break(this)
|
||||
},
|
||||
// The index deletion can batch with everything but must stop after
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids }
|
||||
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
|
||||
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
|
||||
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
ids.append(&mut other);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids },
|
||||
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
|
||||
) => {
|
||||
ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids })
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentClear { .. },
|
||||
K::DocumentImport { .. } | K::Settings { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: operation_ids })
|
||||
}
|
||||
|
||||
// we can autobatch the same kind of document additions / updates
|
||||
(
|
||||
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method: ReplaceDocuments,
|
||||
allow_index_creation,
|
||||
operation_ids,
|
||||
primary_key: pk,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method: UpdateDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
|
||||
K::DocumentDeletion { by_filter: false },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// We can't batch a document operation with a delete by filter
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentDeletion { by_filter: true },
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
// but we can't autobatch documents if it's not the same kind
|
||||
// this match branch MUST be AFTER the previous one
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentImport { .. },
|
||||
) => Break(this),
|
||||
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
||||
}
|
||||
// we can't autobatch the deletion and import if the document deletion contained a filter
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
|
||||
K::DocumentImport { .. }
|
||||
) => Break(this),
|
||||
// we can autobatch the deletion and import if the index already exists
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||
) if index_already_exists => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can autobatch the deletion and import if both can't create an index
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||
) if !allow_index_creation => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { .. },
|
||||
K::DocumentImport { .. }
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
|
||||
}
|
||||
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
other: vec![id],
|
||||
}),
|
||||
(
|
||||
this @ BatchKind::Settings { .. },
|
||||
K::DocumentImport { .. } | K::DocumentDeletion { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::Settings { mut settings_ids, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::Settings {
|
||||
allow_index_creation,
|
||||
settings_ids,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
|
||||
(
|
||||
BatchKind::ClearAndSettings {
|
||||
mut other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
},
|
||||
K::DocumentDeletion { .. },
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a batch from an ordered list of tasks.
|
||||
///
|
||||
/// ## Preconditions
|
||||
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
|
||||
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
|
||||
/// 3. The tasks must all be related to the same index
|
||||
///
|
||||
/// ## Return
|
||||
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
|
||||
/// a subset of the given tasks.
|
||||
pub fn autobatch(
|
||||
enqueued: Vec<(TaskId, KindWithContent)>,
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
let mut enqueued = enqueued.into_iter();
|
||||
let (id, kind) = enqueued.next()?;
|
||||
|
||||
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
|
||||
let mut index_exist = index_already_exists;
|
||||
|
||||
let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) {
|
||||
(Continue(acc), create) => (acc, create),
|
||||
(Break(acc), create) => return Some((acc, create)),
|
||||
};
|
||||
|
||||
// if an index has been created in the previous step we can consider it as existing.
|
||||
index_exist |= must_create_index;
|
||||
|
||||
for (id, kind) in enqueued {
|
||||
acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) {
|
||||
Continue(acc) => acc,
|
||||
Break(acc) => return Some((acc, must_create_index)),
|
||||
};
|
||||
}
|
||||
|
||||
Some((acc, must_create_index))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use meilisearch_types::tasks::IndexSwap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::debug_snapshot;
|
||||
|
||||
fn autobatch_from(
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
input: impl IntoIterator<Item = KindWithContent>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
autobatch(
|
||||
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(),
|
||||
index_already_exists,
|
||||
primary_key,
|
||||
)
|
||||
}
|
||||
|
||||
fn doc_imp(
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> KindWithContent {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
primary_key: primary_key.map(|pk| pk.to_string()),
|
||||
method,
|
||||
content_file: Uuid::new_v4(),
|
||||
documents_count: 0,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: String::from("doggo"),
|
||||
documents_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del_fil() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: String::from("doggo"),
|
||||
filter_expr: serde_json::json!("cuteness > 100"),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_clr() -> KindWithContent {
|
||||
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn settings(allow_index_creation: bool) -> KindWithContent {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
new_settings: Default::default(),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn idx_create() -> KindWithContent {
|
||||
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_update() -> KindWithContent {
|
||||
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_del() -> KindWithContent {
|
||||
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn idx_swap() -> KindWithContent {
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_simple_operation_together() {
|
||||
// we can autobatch one or multiple `ReplaceDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// we can autobatch one or multiple `UpdateDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletion together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletionByFilter together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
|
||||
// we can autobatch one or multiple Settings together
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// We can autobatch document addition with document deletion
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
|
||||
// But we can't autobatch document addition with document deletion by filter
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_document_operation_dont_autobatch_with_other() {
|
||||
// addition, updates and deletion by filter can't batch together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions() {
|
||||
// these two doesn't need to batch
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
// Basic use case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// This batch kind doesn't mix with other document addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// But you can batch multiple clear together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions_and_settings() {
|
||||
// A clear don't need to autobatch the settings that happens AFTER there is no documents
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anything_and_index_deletion() {
|
||||
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
|
||||
// First, the basic cases
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowed_and_disallowed_index_creation() {
|
||||
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_primary_key() {
|
||||
// ==> If I have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// ==> If I don't have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
}
|
||||
}
|
||||
1951
crates/index-scheduler/src/batch.rs
Normal file
1951
crates/index-scheduler/src/batch.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,304 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{utils, Error, IndexScheduler, Result};
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a>,
|
||||
|
||||
batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
|
||||
batch_indexes: HashMap<String, RoaringBitmap>,
|
||||
batch_statuses: HashMap<Status, RoaringBitmap>,
|
||||
batch_kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
|
||||
// While loading a dump no one should be able to access the scheduler thus I can block everything.
|
||||
let wtxn = index_scheduler.env.write_txn()?;
|
||||
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
batch_to_task_mapping: HashMap::new(),
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
batch_indexes: HashMap::new(),
|
||||
batch_statuses: HashMap::new(),
|
||||
batch_kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new batch coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
|
||||
self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
|
||||
if let Some(enqueued_at) = batch.enqueued_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.earliest,
|
||||
batch.uid,
|
||||
)?;
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.oldest,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
|
||||
for index in batch.stats.index_uids.keys() {
|
||||
match self.batch_indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(batch.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(batch.uid);
|
||||
self.batch_indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for status in batch.stats.status.keys() {
|
||||
self.batch_statuses.entry(*status).or_default().insert(batch.uid);
|
||||
}
|
||||
for kind in batch.stats.types.keys() {
|
||||
self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
let mut writer = io::BufWriter::new(file);
|
||||
for doc in content_file {
|
||||
let doc = doc?;
|
||||
serde_json::to_writer(&mut writer, &doc).map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})?;
|
||||
}
|
||||
let file = writer.into_inner().map_err(|e| e.into_error())?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
None if task.status == Status::Enqueued && task_has_no_docs => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
batch_uid: task.batch_uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentDeletionByFilter { filter } => {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
filter_expr: filter,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentEdition { filter, context, function } => {
|
||||
KindWithContent::DocumentEdition {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
filter_expr: filter,
|
||||
context,
|
||||
function,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
|
||||
if let Some(batch_id) = task.batch_uid {
|
||||
self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
|
||||
}
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(task.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(task.uid);
|
||||
self.indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_default().insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (batch_id, task_ids) in self.batch_to_task_mapping {
|
||||
self.index_scheduler.queue.batch_to_tasks_mapping.put(
|
||||
&mut self.wtxn,
|
||||
&batch_id,
|
||||
&task_ids,
|
||||
)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.statuses {
|
||||
self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.kinds {
|
||||
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.batch_indexes {
|
||||
self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.batch_statuses {
|
||||
self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.batch_kinds {
|
||||
self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -109,8 +109,6 @@ pub enum Error {
|
||||
InvalidIndexUid { index_uid: String },
|
||||
#[error("Task `{0}` not found.")]
|
||||
TaskNotFound(TaskId),
|
||||
#[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
|
||||
TaskFileNotFound(TaskId),
|
||||
#[error("Batch `{0}` not found.")]
|
||||
BatchNotFound(BatchId),
|
||||
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
@@ -129,8 +127,8 @@ pub enum Error {
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_uid: Option<String> },
|
||||
#[error("An unexpected crash occurred when processing the task: {0}")]
|
||||
ProcessBatchPanicked(String),
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
@@ -149,9 +147,7 @@ pub enum Error {
|
||||
#[error("Corrupted task queue.")]
|
||||
CorruptedTaskQueue,
|
||||
#[error(transparent)]
|
||||
DatabaseUpgrade(Box<Self>),
|
||||
#[error(transparent)]
|
||||
UnrecoverableError(Box<Self>),
|
||||
TaskDatabaseUpdate(Box<Self>),
|
||||
#[error(transparent)]
|
||||
HeedTransaction(heed::Error),
|
||||
|
||||
@@ -191,7 +187,6 @@ impl Error {
|
||||
| Error::InvalidTaskCanceledBy { .. }
|
||||
| Error::InvalidIndexUid { .. }
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskFileNotFound(_)
|
||||
| Error::BatchNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
@@ -199,7 +194,7 @@ impl Error {
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli { .. }
|
||||
| Error::ProcessBatchPanicked(_)
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
@@ -207,8 +202,7 @@ impl Error {
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
| Error::DatabaseUpgrade(_)
|
||||
| Error::UnrecoverableError(_)
|
||||
| Error::TaskDatabaseUpdate(_)
|
||||
| Error::HeedTransaction(_) => false,
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => false,
|
||||
@@ -253,7 +247,6 @@ impl ErrorCode for Error {
|
||||
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
|
||||
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
|
||||
Error::TaskNotFound(_) => Code::TaskNotFound,
|
||||
Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
|
||||
Error::BatchNotFound(_) => Code::BatchNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
@@ -261,7 +254,7 @@ impl ErrorCode for Error {
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
@@ -273,8 +266,7 @@ impl ErrorCode for Error {
|
||||
Error::Anyhow(_) => Code::Internal,
|
||||
Error::CorruptedTaskQueue => Code::Internal,
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||
Error::UnrecoverableError(_) => Code::Internal,
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
|
||||
@@ -1,29 +1,18 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
|
||||
/// The number of database used by features
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
network: Arc<RwLock<Network>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -67,6 +56,19 @@ impl RoFeatures {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "vector store",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/677",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.edit_documents_by_function {
|
||||
Ok(())
|
||||
@@ -92,62 +94,17 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_network(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.network {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "network",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/805",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_get_task_documents_route(&self) -> Result<()> {
|
||||
if self.runtime.get_task_documents_route {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting the documents of an enqueued task",
|
||||
feature: "get task documents route",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/808",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.composite_embedders {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "composite embedders",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
instance_features: InstanceTogglableFeatures,
|
||||
) -> Result<Self> {
|
||||
let runtime_features_db =
|
||||
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
let txn = env.read_txn()?;
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: metrics || persisted_features.metrics,
|
||||
@@ -156,14 +113,7 @@ impl FeatureData {
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
|
||||
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
|
||||
|
||||
Ok(Self {
|
||||
persisted: runtime_features_db,
|
||||
runtime,
|
||||
network: Arc::new(RwLock::new(network)),
|
||||
})
|
||||
Ok(Self { persisted: runtime_features_db, runtime })
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
@@ -171,7 +121,7 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
@@ -192,21 +142,4 @@ impl FeatureData {
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
|
||||
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
|
||||
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
|
||||
&mut wtxn,
|
||||
db_keys::NETWORK,
|
||||
&new_network,
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
let mut network = self.network.write().unwrap();
|
||||
*network = new_network;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Network {
|
||||
Network::clone(&*self.network.read().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::env::VarError;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
@@ -104,7 +102,7 @@ impl ReopenableIndex {
|
||||
return Ok(());
|
||||
}
|
||||
map.unavailable.remove(&self.uuid);
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size, false)?;
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -173,12 +171,11 @@ impl IndexMap {
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
if !matches!(self.get_unavailable(uuid), Missing) {
|
||||
panic!("Attempt to open an index that was unavailable");
|
||||
}
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size, creation)?;
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
|
||||
match self.available.insert(*uuid, index.clone()) {
|
||||
InsertionOutcome::InsertedNew => (),
|
||||
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
|
||||
@@ -302,31 +299,18 @@ fn create_or_open_index(
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
|
||||
// You can find more details about this experimental
|
||||
// environment variable on the following GitHub discussion:
|
||||
// <https://github.com/orgs/meilisearch/discussions/806>
|
||||
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
|
||||
Ok(value) => u32::from_str(&value).unwrap(),
|
||||
Err(VarError::NotPresent) => 1024,
|
||||
Err(VarError::NotUnicode(value)) => panic!(
|
||||
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
|
||||
),
|
||||
};
|
||||
options.max_readers(max_readers);
|
||||
options.max_readers(1024);
|
||||
if enable_mdb_writemap {
|
||||
unsafe { options.flags(EnvFlags::WRITE_MAP) };
|
||||
}
|
||||
|
||||
if let Some((created, updated)) = date {
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated, creation)?)
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
|
||||
} else {
|
||||
Ok(Index::new(options, path, creation)?)
|
||||
Ok(Index::new(options, path)?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -334,17 +318,17 @@ fn create_or_open_index(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::heed::Env;
|
||||
use meilisearch_types::Index;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::super::IndexMapper;
|
||||
use crate::test_utils::IndexSchedulerHandle;
|
||||
use crate::tests::IndexSchedulerHandle;
|
||||
use crate::utils::clamp_to_page_size;
|
||||
use crate::IndexScheduler;
|
||||
|
||||
impl IndexMapper {
|
||||
fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
|
||||
fn test() -> (Self, Env, IndexSchedulerHandle) {
|
||||
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
|
||||
(index_scheduler.index_mapper, index_scheduler.env, handle)
|
||||
}
|
||||
|
||||
@@ -4,9 +4,8 @@ use std::time::Duration;
|
||||
use std::{fs, thread};
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::database_stats::DatabaseStats;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -17,17 +16,12 @@ use uuid::Uuid;
|
||||
use self::index_map::IndexMap;
|
||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{Error, IndexBudget, IndexSchedulerOptions, Result};
|
||||
use crate::{Error, Result};
|
||||
|
||||
mod index_map;
|
||||
|
||||
/// The number of database used by index mapper
|
||||
const NUMBER_OF_DATABASES: u32 = 2;
|
||||
/// Database const names for the `IndexMapper`.
|
||||
mod db_name {
|
||||
pub const INDEX_MAPPING: &str = "index-mapping";
|
||||
pub const INDEX_STATS: &str = "index-stats";
|
||||
}
|
||||
const INDEX_MAPPING: &str = "index-mapping";
|
||||
const INDEX_STATS: &str = "index-stats";
|
||||
|
||||
/// Structure managing meilisearch's indexes.
|
||||
///
|
||||
@@ -99,32 +93,19 @@ pub enum IndexStatus {
|
||||
/// The statistics that can be computed from an `Index` object.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexStats {
|
||||
/// Stats of the documents database.
|
||||
#[serde(default)]
|
||||
pub documents_database_stats: DatabaseStats,
|
||||
|
||||
#[serde(default, skip_serializing)]
|
||||
pub number_of_documents: Option<u64>,
|
||||
|
||||
/// Number of documents in the index.
|
||||
pub number_of_documents: u64,
|
||||
/// Size taken up by the index' DB, in bytes.
|
||||
///
|
||||
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||
/// `used_database_size` that only includes the size of the used pages.
|
||||
pub database_size: u64,
|
||||
/// Number of embeddings in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embeddings: Option<u64>,
|
||||
/// Number of embedded documents in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embedded_documents: Option<u64>,
|
||||
/// Size taken by the used pages of the index' DB, in bytes.
|
||||
///
|
||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||
/// this value is typically smaller than `database_size`.
|
||||
pub used_database_size: u64,
|
||||
/// The primary key of the index
|
||||
pub primary_key: Option<String>,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
/// Creation date of the index.
|
||||
@@ -142,15 +123,10 @@ impl IndexStats {
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||
let arroy_stats = index.arroy_stats(rtxn)?;
|
||||
Ok(IndexStats {
|
||||
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
|
||||
number_of_embedded_documents: Some(arroy_stats.documents.len()),
|
||||
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
|
||||
number_of_documents: None,
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
database_size: index.on_disk_size()?,
|
||||
used_database_size: index.used_size()?,
|
||||
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
||||
field_distribution: index.field_distribution(rtxn)?,
|
||||
created_at: index.created_at(rtxn)?,
|
||||
updated_at: index.updated_at(rtxn)?,
|
||||
@@ -159,25 +135,29 @@ impl IndexStats {
|
||||
}
|
||||
|
||||
impl IndexMapper {
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
budget: IndexBudget,
|
||||
env: &Env,
|
||||
base_path: PathBuf,
|
||||
index_base_map_size: usize,
|
||||
index_growth_amount: usize,
|
||||
index_count: usize,
|
||||
enable_mdb_writemap: bool,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
|
||||
let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Self {
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))),
|
||||
index_mapping: env.create_database(wtxn, Some(db_name::INDEX_MAPPING))?,
|
||||
index_stats: env.create_database(wtxn, Some(db_name::INDEX_STATS))?,
|
||||
base_path: options.indexes_path.clone(),
|
||||
index_base_map_size: budget.map_size,
|
||||
index_growth_amount: options.index_growth_amount,
|
||||
enable_mdb_writemap: options.enable_mdb_writemap,
|
||||
indexer_config: options.indexer_config.clone(),
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
|
||||
index_mapping,
|
||||
index_stats,
|
||||
base_path,
|
||||
index_base_map_size,
|
||||
index_growth_amount,
|
||||
enable_mdb_writemap,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
currently_updating_index: Default::default(),
|
||||
})
|
||||
}
|
||||
@@ -214,14 +194,8 @@ impl IndexMapper {
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
true,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
self.store_stats_of(&mut wtxn, name, &stats)?;
|
||||
drop(index_rtxn);
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -413,7 +387,6 @@ impl IndexMapper {
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
false,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
}
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||
use meilisearch_types::versioning;
|
||||
use meilisearch_types::tasks::{Details, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::{IndexScheduler, BEI128};
|
||||
use crate::{IndexScheduler, Kind, Status, BEI128};
|
||||
|
||||
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
// Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run.
|
||||
@@ -19,15 +18,41 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
scheduler.assert_internally_consistent();
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
cleanup_enabled: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
env,
|
||||
version,
|
||||
queue,
|
||||
scheduler,
|
||||
all_tasks,
|
||||
all_batches,
|
||||
batch_to_tasks_mapping,
|
||||
// task reverse index
|
||||
status,
|
||||
kind,
|
||||
index_tasks,
|
||||
canceled_by,
|
||||
enqueued_at,
|
||||
started_at,
|
||||
finished_at,
|
||||
|
||||
// batch reverse index
|
||||
batch_status,
|
||||
batch_kind,
|
||||
batch_index_tasks,
|
||||
batch_enqueued_at,
|
||||
batch_started_at,
|
||||
batch_finished_at,
|
||||
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
auth_path: _,
|
||||
version_file_path: _,
|
||||
webhook_url: _,
|
||||
webhook_authorization_header: _,
|
||||
test_breakpoint_sdr: _,
|
||||
@@ -40,18 +65,8 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let mut snap = String::new();
|
||||
|
||||
let indx_sched_version = version.get_version(&rtxn).unwrap();
|
||||
let latest_version = (
|
||||
versioning::VERSION_MAJOR.parse().unwrap(),
|
||||
versioning::VERSION_MINOR.parse().unwrap(),
|
||||
versioning::VERSION_PATCH.parse().unwrap(),
|
||||
);
|
||||
if indx_sched_version != Some(latest_version) {
|
||||
snap.push_str(&format!("index scheduler running on version {indx_sched_version:?}\n"));
|
||||
}
|
||||
|
||||
let processing = processing_tasks.read().unwrap().clone();
|
||||
snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled));
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str(&format!(
|
||||
"### Processing batch {:?}:\n",
|
||||
processing.batch.as_ref().map(|batch| batch.uid)
|
||||
@@ -64,19 +79,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Tasks:\n");
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks));
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.tasks.status));
|
||||
snap.push_str(&snapshot_status(&rtxn, *status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, *kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Mapper:\n");
|
||||
@@ -84,55 +99,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Canceled By:\n");
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by));
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Batches:\n");
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches));
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, *all_batches));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batch to tasks mapping:\n");
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping));
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.batches.status));
|
||||
snap.push_str(&snapshot_status(&rtxn, *batch_status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, *batch_kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### File Store:\n");
|
||||
snap.push_str(&snapshot_file_store(&queue.file_store));
|
||||
snap.push_str(&snapshot_file_store(file_store));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap
|
||||
@@ -291,9 +306,6 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexSwap { swaps } => {
|
||||
format!("{{ swaps: {swaps:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -341,24 +353,14 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
||||
|
||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
let mut snap = String::new();
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
|
||||
let stats = BatchStats {
|
||||
progress_trace: Default::default(),
|
||||
internal_database_sizes: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
..stats.clone()
|
||||
};
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
|
||||
if let Some(finished_at) = finished_at {
|
||||
assert!(finished_at > started_at);
|
||||
}
|
||||
let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap();
|
||||
assert!(*started_at > earliest);
|
||||
assert!(earliest >= oldest);
|
||||
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(stats).unwrap()));
|
||||
snap.push('}');
|
||||
snap
|
||||
}
|
||||
@@ -371,8 +373,7 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
|
||||
let stats = mapper.stats_of(rtxn, &name).unwrap();
|
||||
s.push_str(&format!(
|
||||
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
|
||||
stats.documents_database_stats.number_of_entries(),
|
||||
stats.field_distribution
|
||||
stats.number_of_documents, stats.field_distribution
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +1,14 @@
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView};
|
||||
use enum_iterator::Sequence;
|
||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
|
||||
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::utils::ProcessingBatch;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
#[derive(Clone)]
|
||||
pub struct ProcessingTasks {
|
||||
pub batch: Option<Arc<ProcessingBatch>>,
|
||||
/// The list of tasks ids that are currently running.
|
||||
@@ -18,7 +20,7 @@ pub struct ProcessingTasks {
|
||||
impl ProcessingTasks {
|
||||
/// Creates an empty `ProcessingAt` struct.
|
||||
pub fn new() -> ProcessingTasks {
|
||||
ProcessingTasks::default()
|
||||
ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
|
||||
}
|
||||
|
||||
pub fn get_progress_view(&self) -> Option<ProgressView> {
|
||||
@@ -64,13 +66,6 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum FinalizingIndexStep {
|
||||
Committing,
|
||||
ComputingStats,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
@@ -103,7 +98,6 @@ make_enum_progress! {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheTasks,
|
||||
DumpTheBatches,
|
||||
DumpTheIndexes,
|
||||
DumpTheExperimentalFeatures,
|
||||
CompressTheDump,
|
||||
@@ -179,6 +173,32 @@ make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
pub struct VariableNameStep {
|
||||
name: String,
|
||||
current: u32,
|
||||
total: u32,
|
||||
}
|
||||
|
||||
impl VariableNameStep {
|
||||
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
||||
Self { name: name.into(), current, total }
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for VariableNameStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.name.clone().into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.total
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
@@ -1,603 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
insert_task_datetime, keep_ids_within_datetimes, map_bound,
|
||||
remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, Result, BEI128};
|
||||
|
||||
/// The number of database used by the batch queue
|
||||
const NUMBER_OF_DATABASES: u32 = 7;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const ALL_BATCHES: &str = "all-batches";
|
||||
|
||||
pub const BATCH_STATUS: &str = "batch-status";
|
||||
pub const BATCH_KIND: &str = "batch-kind";
|
||||
pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks";
|
||||
pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at";
|
||||
pub const BATCH_STARTED_AT: &str = "batch-started-at";
|
||||
pub const BATCH_FINISHED_AT: &str = "batch-finished-at";
|
||||
}
|
||||
|
||||
pub struct BatchQueue {
|
||||
/// Contains all the batches accessible by their Id.
|
||||
pub(crate) all_batches: Database<BEU32, SerdeJson<Batch>>,
|
||||
|
||||
/// All the batches containing a task matching the selected status.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the batches ids grouped by the kind of their task.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the batches associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing finished tasks started at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl BatchQueue {
|
||||
pub(crate) fn private_clone(&self) -> BatchQueue {
|
||||
BatchQueue {
|
||||
all_batches: self.all_batches,
|
||||
status: self.status,
|
||||
kind: self.kind,
|
||||
index_tasks: self.index_tasks,
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
|
||||
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,
|
||||
kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?,
|
||||
index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?,
|
||||
enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?,
|
||||
started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?,
|
||||
finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result<BatchId> {
|
||||
Ok(self
|
||||
.all_batches
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.last(rtxn)?
|
||||
.map(|(k, _)| k + 1)
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<Option<Batch>> {
|
||||
Ok(self.all_batches.get(rtxn, &batch_id)?)
|
||||
}
|
||||
|
||||
/// Returns the whole set of batches that belongs to this index.
|
||||
pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn update_index(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut batches = self.index_batches(wtxn, index)?;
|
||||
f(&mut batches);
|
||||
if batches.is_empty() {
|
||||
self.index_tasks.delete(wtxn, index)?;
|
||||
} else {
|
||||
self.index_tasks.put(wtxn, index, &batches)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
self.put_status(wtxn, status, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
self.put_kind(wtxn, kind, &tasks)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
|
||||
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
|
||||
|
||||
self.all_batches.put(
|
||||
wtxn,
|
||||
&batch.uid,
|
||||
&Batch {
|
||||
uid: batch.uid,
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
},
|
||||
)?;
|
||||
|
||||
// Update the statuses
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
for status in old_batch.stats.status.keys() {
|
||||
self.update_status(wtxn, *status, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for status in batch.statuses {
|
||||
self.update_status(wtxn, status, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the kinds / types
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let kinds: HashSet<_> = old_batch.stats.types.keys().cloned().collect();
|
||||
for kind in kinds.difference(&batch.kinds) {
|
||||
self.update_kind(wtxn, *kind, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for kind in batch.kinds {
|
||||
self.update_kind(wtxn, kind, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the indexes
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let indexes: HashSet<_> = old_batch.stats.index_uids.keys().cloned().collect();
|
||||
for index in indexes.difference(&batch.indexes) {
|
||||
self.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for index in batch.indexes {
|
||||
self.update_index(wtxn, &index, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the enqueued_at: we cannot retrieve the previous enqueued at from the previous batch, and
|
||||
// must instead go through the db looking for it. We cannot look at the task contained in this batch either
|
||||
// because they may have been removed.
|
||||
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
|
||||
// to the DB per batches.
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
if let Some(enqueued_at) = old_batch.enqueued_at {
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.enqueued_at,
|
||||
old_batch.started_at,
|
||||
old_batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
old_batch.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// A finished batch MUST contains at least one task and have an enqueued_at
|
||||
let enqueued_at = batch.enqueued_at.as_ref().unwrap();
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
|
||||
|
||||
// Update the started at and finished at
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
remove_task_datetime(wtxn, self.started_at, old_batch.started_at, old_batch.uid)?;
|
||||
if let Some(finished_at) = old_batch.finished_at {
|
||||
remove_task_datetime(wtxn, self.finished_at, finished_at, old_batch.uid)?;
|
||||
}
|
||||
}
|
||||
insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of batches. The batches MUST exist or a
|
||||
/// `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_batches(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
tasks: impl IntoIterator<Item = BatchId>,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<Vec<Batch>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|batch_id| {
|
||||
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
let mut batch = processing.batch.as_ref().unwrap().to_batch();
|
||||
batch.progress = processing.get_progress_view();
|
||||
Ok(batch)
|
||||
} else {
|
||||
self.get_batch(rtxn, batch_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
}
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
/// Return the batch ids matched by the given query from the index scheduler's point of view.
|
||||
pub(crate) fn get_batch_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let Query {
|
||||
limit,
|
||||
from,
|
||||
reverse,
|
||||
uids,
|
||||
batch_uids,
|
||||
statuses,
|
||||
types,
|
||||
index_uids,
|
||||
canceled_by,
|
||||
before_enqueued_at,
|
||||
after_enqueued_at,
|
||||
before_started_at,
|
||||
after_started_at,
|
||||
before_finished_at,
|
||||
after_finished_at,
|
||||
} = query;
|
||||
|
||||
let mut batches = self.batches.all_batch_ids(rtxn)?;
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
batches.insert(batch_id);
|
||||
}
|
||||
|
||||
if let Some(from) = from {
|
||||
let range = if reverse.unwrap_or_default() {
|
||||
u32::MIN..*from
|
||||
} else {
|
||||
from.saturating_add(1)..u32::MAX
|
||||
};
|
||||
batches.remove_range(range);
|
||||
}
|
||||
|
||||
if let Some(batch_uids) = &batch_uids {
|
||||
let batches_uids = RoaringBitmap::from_iter(batch_uids);
|
||||
batches &= batches_uids;
|
||||
}
|
||||
|
||||
if let Some(status) = &statuses {
|
||||
let mut status_batches = RoaringBitmap::new();
|
||||
for status in status {
|
||||
match status {
|
||||
// special case for Processing batches
|
||||
Status::Processing => {
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
status_batches.insert(batch_id);
|
||||
}
|
||||
}
|
||||
// Enqueued tasks are not stored in batches
|
||||
Status::Enqueued => (),
|
||||
status => status_batches |= &self.batches.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
if let Some(ref batch) = processing.batch {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
batches &= status_batches;
|
||||
}
|
||||
|
||||
if let Some(task_uids) = &uids {
|
||||
let mut batches_by_task_uids = RoaringBitmap::new();
|
||||
for task_uid in task_uids {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? {
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
batches_by_task_uids.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
batches &= batches_by_task_uids;
|
||||
}
|
||||
|
||||
// There is no database for this query, we must retrieve the task queried by the client and ensure it's valid
|
||||
if let Some(canceled_by) = &canceled_by {
|
||||
let mut all_canceled_batches = RoaringBitmap::new();
|
||||
for cancel_uid in canceled_by {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? {
|
||||
if task.kind.as_kind() == Kind::TaskCancelation
|
||||
&& task.status == Status::Succeeded
|
||||
{
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
all_canceled_batches.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the canceled_by has been specified but no batch
|
||||
// matches then we prefer matching zero than all batches.
|
||||
if all_canceled_batches.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
} else {
|
||||
batches &= all_canceled_batches;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kind) = &types {
|
||||
let mut kind_batches = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_batches |= self.batches.get_kind(rtxn, *kind)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid))
|
||||
{
|
||||
kind_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &kind_batches;
|
||||
}
|
||||
|
||||
if let Some(index) = &index_uids {
|
||||
let mut index_batches = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_batches |= self.batches.index_batches(rtxn, index)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.indexes.contains(index).then_some(batch.uid))
|
||||
{
|
||||
index_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &index_batches;
|
||||
}
|
||||
|
||||
// For the started_at filter, we need to treat the part of the batches that are processing from the part of the
|
||||
// batches that are not processing. The non-processing ones are filtered normally while the processing ones
|
||||
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
||||
batches = {
|
||||
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
||||
(&batches - &*processing.processing, &batches & &*processing.processing);
|
||||
|
||||
// special case for Processing batches
|
||||
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
||||
let mut clear_filtered_processing_batches =
|
||||
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let is_within_dates = RangeBounds::contains(
|
||||
&(start, end),
|
||||
&processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
|
||||
.unix_timestamp_nanos(),
|
||||
);
|
||||
if !is_within_dates {
|
||||
filtered_processing_batches.clear();
|
||||
}
|
||||
};
|
||||
match (after_started_at, before_started_at) {
|
||||
(None, None) => (),
|
||||
(None, Some(before)) => {
|
||||
clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before))
|
||||
}
|
||||
(Some(after), None) => {
|
||||
clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded)
|
||||
}
|
||||
(Some(after), Some(before)) => clear_filtered_processing_batches(
|
||||
Bound::Excluded(*after),
|
||||
Bound::Excluded(*before),
|
||||
),
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut filtered_non_processing_batches,
|
||||
self.batches.started_at,
|
||||
*after_started_at,
|
||||
*before_started_at,
|
||||
)?;
|
||||
filtered_non_processing_batches | filtered_processing_batches
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.enqueued_at,
|
||||
*after_enqueued_at,
|
||||
*before_enqueued_at,
|
||||
)?;
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.finished_at,
|
||||
*after_finished_at,
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
batches = if query.reverse.unwrap_or_default() {
|
||||
batches.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
batches.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
|
||||
Ok(batches)
|
||||
}
|
||||
|
||||
/// Return the batch ids matching the query along with the total number of batches
|
||||
/// by ignoring the from and limit parameters from the user's point of view.
|
||||
///
|
||||
/// There are two differences between an internal query and a query executed by
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub(crate) fn get_batch_ids_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(RoaringBitmap, u64)> {
|
||||
// compute all batches matching the filter by ignoring the limits, to find the number of batches matching
|
||||
// the filter.
|
||||
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
|
||||
// us from modifying the underlying implementation, and the performance remains sufficient.
|
||||
// Should this change, we would modify `get_batch_ids` to directly return the number of matching batches.
|
||||
let total_batches =
|
||||
self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?;
|
||||
let mut batches = self.get_batch_ids(rtxn, query, processing)?;
|
||||
|
||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||
// then we must exclude all the batches that only contains tasks associated to multiple indexes.
|
||||
// This works because we don't autobatch tasks associated to multiple indexes with tasks associated
|
||||
// to a single index. e.g: IndexSwap cannot be batched with IndexCreation.
|
||||
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||
batches -= self.tasks.get_kind(rtxn, kind)?;
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
if batch.kinds.contains(&kind) {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Any batch that is internally associated with at least one authorized index
|
||||
// must be returned.
|
||||
if !filters.all_indexes_authorized() {
|
||||
let mut valid_indexes = RoaringBitmap::new();
|
||||
let mut forbidden_indexes = RoaringBitmap::new();
|
||||
|
||||
let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?;
|
||||
for result in all_indexes_iter {
|
||||
let (index, index_tasks) = result?;
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes |= index_tasks;
|
||||
} else {
|
||||
forbidden_indexes |= index_tasks;
|
||||
}
|
||||
}
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
for index in &batch.indexes {
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes.insert(batch.uid);
|
||||
} else {
|
||||
forbidden_indexes.insert(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a batch had ONE valid task then it should be returned
|
||||
let invalid_batches = forbidden_indexes - valid_indexes;
|
||||
|
||||
batches -= invalid_batches;
|
||||
}
|
||||
|
||||
Ok((batches, total_batches.len()))
|
||||
}
|
||||
|
||||
pub(crate) fn get_batches_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(Vec<Batch>, u64)> {
|
||||
let (batches, total) =
|
||||
self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?;
|
||||
let batches = if query.reverse.unwrap_or_default() {
|
||||
Box::new(batches.into_iter()) as Box<dyn Iterator<Item = u32>>
|
||||
} else {
|
||||
Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
|
||||
let batches = self.batches.get_existing_batches(
|
||||
rtxn,
|
||||
batches.take(query.limit.unwrap_or(u32::MAX) as usize),
|
||||
processing,
|
||||
)?;
|
||||
|
||||
Ok((batches, total))
|
||||
}
|
||||
}
|
||||
@@ -1,476 +0,0 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, FailureLocation};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn query_batches_from_and_limit() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
let kind = index_creation_task("whalo", "plankton");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
let kind = index_creation_task("catto", "his_own_vomit");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_n_successful_batches(3);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
|
||||
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query { from: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_simple() {
|
||||
let start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (mut batches, _) = index_scheduler
|
||||
.get_batches_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
assert!(batches[0].enqueued_at.is_some());
|
||||
batches[0].enqueued_at = None;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null,
|
||||
"enqueuedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks
|
||||
|
||||
let query =
|
||||
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should excludes all of them
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should exclude the enqueued tasks and include the only processing task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit");
|
||||
|
||||
let second_start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should include all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the second part of the test and before one minute after the
|
||||
// second start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// we run the same query to verify that, and indeed find that the last task is matched
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||
// again only return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// now the last task should have failed
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// so running the last query should return nothing
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![1]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with an invalid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![2]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with a valid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_special_rules() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||
// associated with doggo -> empty result
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
drop(rtxn);
|
||||
// We're going to advance and process all the batches for the next query to actually hit the db
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
]);
|
||||
handle.advance_one_successful_batch();
|
||||
handle.advance_n_failed_batches(2);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything");
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||
// -> only the index creation of doggo should be returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![
|
||||
IndexUidPattern::new_unchecked("catto"),
|
||||
IndexUidPattern::new_unchecked("doggo"),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||
// -> all tasks except the swap of catto with whalo are returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_canceled_by() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
let kind = KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_string(),
|
||||
tasks: [0, 1, 2, 3].into_iter().collect(),
|
||||
};
|
||||
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// The batch zero was the index creation task, the 1 is the task cancellation
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
// Return only 1 because the user is not authorized to see task 2
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
}
|
||||
@@ -1,391 +0,0 @@
|
||||
mod batches;
|
||||
#[cfg(test)]
|
||||
mod batches_test;
|
||||
mod tasks;
|
||||
#[cfg(test)]
|
||||
mod tasks_test;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File as StdFile;
|
||||
use std::time::Duration;
|
||||
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub(crate) use self::batches::BatchQueue;
|
||||
pub(crate) use self::tasks::TaskQueue;
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexSchedulerOptions, Result, TaskId};
|
||||
|
||||
/// The number of database used by queue itself
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
|
||||
}
|
||||
|
||||
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
|
||||
///
|
||||
/// An empty/default query (where each field is set to `None`) matches all tasks.
|
||||
/// Each non-null field restricts the set of tasks further.
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Query {
|
||||
/// The maximum number of tasks to be matched
|
||||
pub limit: Option<u32>,
|
||||
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub from: Option<u32>,
|
||||
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
|
||||
pub reverse: Option<bool>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub uids: Option<Vec<TaskId>>,
|
||||
/// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched
|
||||
pub batch_uids: Option<Vec<BatchId>>,
|
||||
/// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls
|
||||
pub statuses: Option<Vec<Status>>,
|
||||
/// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks.
|
||||
///
|
||||
/// The kind of a task is given by:
|
||||
/// ```
|
||||
/// # use meilisearch_types::tasks::{Task, Kind};
|
||||
/// # fn doc_func(task: Task) -> Kind {
|
||||
/// task.kind.as_kind()
|
||||
/// # }
|
||||
/// ```
|
||||
pub types: Option<Vec<Kind>>,
|
||||
/// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks
|
||||
pub index_uids: Option<Vec<String>>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks
|
||||
/// that canceled the matched tasks.
|
||||
pub canceled_by: Option<Vec<TaskId>>,
|
||||
/// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub before_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub after_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub before_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub after_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub before_finished_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub after_finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
/// Return `true` if every field of the query is set to `None`, such that the query
|
||||
/// matches all tasks.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
reverse: None,
|
||||
uids: None,
|
||||
batch_uids: None,
|
||||
statuses: None,
|
||||
types: None,
|
||||
index_uids: None,
|
||||
canceled_by: None,
|
||||
before_enqueued_at: None,
|
||||
after_enqueued_at: None,
|
||||
before_started_at: None,
|
||||
after_started_at: None,
|
||||
before_finished_at: None,
|
||||
after_finished_at: None,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
|
||||
pub fn with_index(self, index_uid: String) -> Self {
|
||||
let mut index_vec = self.index_uids.unwrap_or_default();
|
||||
index_vec.push(index_uid);
|
||||
Self { index_uids: Some(index_vec), ..self }
|
||||
}
|
||||
|
||||
// Removes the `from` and `limit` restrictions from the query.
|
||||
// Useful to get the total number of tasks matching a filter.
|
||||
pub fn without_limits(self) -> Self {
|
||||
Query { limit: None, from: None, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
/// to be performed on them.
|
||||
pub struct Queue {
|
||||
pub(crate) tasks: tasks::TaskQueue,
|
||||
pub(crate) batches: batches::BatchQueue,
|
||||
|
||||
/// Matches a batch id with the associated task ids.
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
|
||||
/// The list of files referenced by the tasks.
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
pub(crate) fn private_clone(&self) -> Queue {
|
||||
Queue {
|
||||
tasks: self.tasks.private_clone(),
|
||||
batches: self.batches.private_clone(),
|
||||
batch_to_tasks_mapping: self.batch_to_tasks_mapping,
|
||||
file_store: self.file_store.clone(),
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
tasks::TaskQueue::nb_db() + batches::BatchQueue::nb_db() + NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub(crate) fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
) -> Result<Self> {
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
Ok(Self {
|
||||
file_store: FileStore::new(&options.update_file_path)?,
|
||||
batch_to_tasks_mapping: env
|
||||
.create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?,
|
||||
tasks: TaskQueue::new(env, wtxn)?,
|
||||
batches: BatchQueue::new(env, wtxn)?,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this batch.
|
||||
pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<RoaringBitmap> {
|
||||
Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks.
|
||||
///
|
||||
/// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_tasks_for_processing_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing_batch: &mut ProcessingBatch,
|
||||
tasks: impl IntoIterator<Item = TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|task_id| {
|
||||
let mut task = self
|
||||
.tasks
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue));
|
||||
processing_batch.processing(&mut task);
|
||||
task
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
batch: ProcessingBatch,
|
||||
tasks: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?;
|
||||
self.batches.write_batch(wtxn, batch)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
|
||||
match task.content_uuid() {
|
||||
Some(content_file) => self.delete_update_file(content_file),
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Open and returns the task's content File.
|
||||
pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
|
||||
self.file_store.get_update(uuid)
|
||||
}
|
||||
|
||||
/// Delete a file from the index scheduler.
|
||||
///
|
||||
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.
|
||||
pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> {
|
||||
Ok(self.file_store.delete(uuid)?)
|
||||
}
|
||||
|
||||
/// Create a file and register it in the index scheduler.
|
||||
///
|
||||
/// The returned file and uuid can be used to associate
|
||||
/// some data to a task. The file will be kept until
|
||||
/// the task has been fully processed.
|
||||
pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> {
|
||||
if dry_run {
|
||||
Ok((Uuid::nil(), file_store::File::dry_file()?))
|
||||
} else {
|
||||
Ok(self.file_store.new_update()?)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> {
|
||||
Ok(self.file_store.new_update_with_uuid(uuid)?)
|
||||
}
|
||||
|
||||
/// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes.
|
||||
pub fn compute_update_file_size(&self) -> Result<u64> {
|
||||
Ok(self.file_store.compute_total_size()?)
|
||||
}
|
||||
|
||||
pub fn register(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: &KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
|
||||
if let Some(uid) = task_id {
|
||||
if uid < next_task_id {
|
||||
return Err(Error::BadTaskId { received: uid, expected: next_task_id });
|
||||
}
|
||||
}
|
||||
|
||||
let mut task = Task {
|
||||
uid: task_id.unwrap_or(next_task_id),
|
||||
// The batch is defined once we starts processing the task
|
||||
batch_uid: None,
|
||||
enqueued_at: OffsetDateTime::now_utc(),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
error: None,
|
||||
canceled_by: None,
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
filter_out_references_to_newer_tasks(&mut task);
|
||||
// If the register task is an index swap task, verify that it is well-formed
|
||||
// (that it does not contain duplicate indexes).
|
||||
check_index_swap_validity(&task)?;
|
||||
|
||||
// At this point the task is going to be registered and no further checks will be done
|
||||
if dry_run {
|
||||
return Ok(task);
|
||||
}
|
||||
|
||||
// Get rid of the mutability.
|
||||
let task = task;
|
||||
self.tasks.register(wtxn, &task)?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> {
|
||||
let nb_tasks = self.tasks.all_task_ids(wtxn)?.len();
|
||||
// if we have less than 1M tasks everything is fine
|
||||
if nb_tasks < self.max_number_of_tasks as u64 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
|
||||
let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
|
||||
|
||||
self.register(
|
||||
wtxn,
|
||||
&KindWithContent::TaskDeletion {
|
||||
query: format!(
|
||||
"?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
|
||||
delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
|
||||
),
|
||||
tasks: to_delete,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_stats(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||
let mut res = BTreeMap::new();
|
||||
let processing_tasks = processing.processing.len();
|
||||
|
||||
res.insert(
|
||||
"statuses".to_string(),
|
||||
enum_iterator::all::<Status>()
|
||||
.map(|s| {
|
||||
let tasks = self.tasks.get_status(rtxn, s)?.len();
|
||||
match s {
|
||||
Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
|
||||
Status::Processing => Ok((s.to_string(), processing_tasks)),
|
||||
s => Ok((s.to_string(), tasks)),
|
||||
}
|
||||
})
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"types".to_string(),
|
||||
enum_iterator::all::<Kind>()
|
||||
.map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len())))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"indexes".to_string(),
|
||||
self.tasks
|
||||
.index_tasks
|
||||
.iter(rtxn)?
|
||||
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
@@ -1,541 +0,0 @@
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
self, insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime,
|
||||
};
|
||||
use crate::{Error, Result, TaskId, BEI128};
|
||||
|
||||
/// The number of database used by the task queue
|
||||
const NUMBER_OF_DATABASES: u32 = 8;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const ALL_TASKS: &str = "all-tasks";
|
||||
|
||||
pub const STATUS: &str = "status";
|
||||
pub const KIND: &str = "kind";
|
||||
pub const INDEX_TASKS: &str = "index-tasks";
|
||||
pub const CANCELED_BY: &str = "canceled_by";
|
||||
pub const ENQUEUED_AT: &str = "enqueued-at";
|
||||
pub const STARTED_AT: &str = "started-at";
|
||||
pub const FINISHED_AT: &str = "finished-at";
|
||||
}
|
||||
|
||||
pub struct TaskQueue {
|
||||
/// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
// TODO we should not be able to serialize a `Status::Processing` in this database.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the tasks ids grouped by their kind.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl TaskQueue {
|
||||
pub(crate) fn private_clone(&self) -> TaskQueue {
|
||||
TaskQueue {
|
||||
all_tasks: self.all_tasks,
|
||||
status: self.status,
|
||||
kind: self.kind,
|
||||
index_tasks: self.index_tasks,
|
||||
canceled_by: self.canceled_by,
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
|
||||
status: env.create_database(wtxn, Some(db_name::STATUS))?,
|
||||
kind: env.create_database(wtxn, Some(db_name::KIND))?,
|
||||
index_tasks: env.create_database(wtxn, Some(db_name::INDEX_TASKS))?,
|
||||
canceled_by: env.create_database(wtxn, Some(db_name::CANCELED_BY))?,
|
||||
enqueued_at: env.create_database(wtxn, Some(db_name::ENQUEUED_AT))?,
|
||||
started_at: env.create_database(wtxn, Some(db_name::STARTED_AT))?,
|
||||
finished_at: env.create_database(wtxn, Some(db_name::FINISHED_AT))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
|
||||
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
|
||||
}
|
||||
|
||||
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
|
||||
Ok(self.last_task_id(rtxn)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
|
||||
Ok(self.all_tasks.get(rtxn, &task_id)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
let reprocessing = old_task.status != Status::Enqueued;
|
||||
|
||||
debug_assert!(old_task != *task);
|
||||
debug_assert_eq!(old_task.uid, task.uid);
|
||||
|
||||
// If we're processing a task that failed it may already contains a batch_uid
|
||||
debug_assert!(
|
||||
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
|
||||
"\n==> old: {old_task:?}\n==> new: {task:?}"
|
||||
);
|
||||
|
||||
if old_task.status != task.status {
|
||||
self.update_status(wtxn, old_task.status, |bitmap| {
|
||||
bitmap.remove(task.uid);
|
||||
})?;
|
||||
self.update_status(wtxn, task.status, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
if old_task.kind.as_kind() != task.kind.as_kind() {
|
||||
self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| {
|
||||
bitmap.remove(task.uid);
|
||||
})?;
|
||||
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
old_task.enqueued_at, task.enqueued_at,
|
||||
"Cannot update a task's enqueued_at time"
|
||||
);
|
||||
if old_task.started_at != task.started_at {
|
||||
assert!(
|
||||
reprocessing || old_task.started_at.is_none(),
|
||||
"Cannot update a task's started_at time"
|
||||
);
|
||||
if let Some(started_at) = old_task.started_at {
|
||||
remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
if let Some(started_at) = task.started_at {
|
||||
insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
if old_task.finished_at != task.finished_at {
|
||||
assert!(
|
||||
reprocessing || old_task.finished_at.is_none(),
|
||||
"Cannot update a task's finished_at time"
|
||||
);
|
||||
if let Some(finished_at) = old_task.finished_at {
|
||||
remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this index.
|
||||
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn update_index(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.index_tasks(wtxn, index)?;
|
||||
f(&mut tasks);
|
||||
if tasks.is_empty() {
|
||||
self.index_tasks.delete(wtxn, index)?;
|
||||
} else {
|
||||
self.index_tasks.put(wtxn, index, &tasks)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
self.put_status(wtxn, status, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
self.put_kind(wtxn, kind, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
|
||||
/// `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_tasks(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
tasks: impl IntoIterator<Item = TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|task_id| {
|
||||
self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn register(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
self.update_status(wtxn, Status::Enqueued, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
utils::insert_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
/// Return the task ids matched by the given query from the index scheduler's point of view.
|
||||
pub(crate) fn get_task_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } =
|
||||
processing_tasks;
|
||||
let Query {
|
||||
limit,
|
||||
from,
|
||||
reverse,
|
||||
uids,
|
||||
batch_uids,
|
||||
statuses,
|
||||
types,
|
||||
index_uids,
|
||||
canceled_by,
|
||||
before_enqueued_at,
|
||||
after_enqueued_at,
|
||||
before_started_at,
|
||||
after_started_at,
|
||||
before_finished_at,
|
||||
after_finished_at,
|
||||
} = query;
|
||||
|
||||
let mut tasks = self.tasks.all_task_ids(rtxn)?;
|
||||
|
||||
if let Some(from) = from {
|
||||
let range = if reverse.unwrap_or_default() {
|
||||
u32::MIN..*from
|
||||
} else {
|
||||
from.saturating_add(1)..u32::MAX
|
||||
};
|
||||
tasks.remove_range(range);
|
||||
}
|
||||
|
||||
if let Some(batch_uids) = batch_uids {
|
||||
let mut batch_tasks = RoaringBitmap::new();
|
||||
for batch_uid in batch_uids {
|
||||
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
||||
batch_tasks |= &**processing_tasks;
|
||||
} else {
|
||||
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
||||
}
|
||||
}
|
||||
tasks &= batch_tasks;
|
||||
}
|
||||
|
||||
if let Some(status) = statuses {
|
||||
let mut status_tasks = RoaringBitmap::new();
|
||||
for status in status {
|
||||
match status {
|
||||
// special case for Processing tasks
|
||||
Status::Processing => {
|
||||
status_tasks |= &**processing_tasks;
|
||||
}
|
||||
status => status_tasks |= &self.tasks.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
tasks -= &**processing_tasks;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
|
||||
if let Some(uids) = uids {
|
||||
let uids = RoaringBitmap::from_iter(uids);
|
||||
tasks &= &uids;
|
||||
}
|
||||
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
let mut all_canceled_tasks = RoaringBitmap::new();
|
||||
for cancel_task_uid in canceled_by {
|
||||
if let Some(canceled_by_uid) = self.tasks.canceled_by.get(rtxn, cancel_task_uid)? {
|
||||
all_canceled_tasks |= canceled_by_uid;
|
||||
}
|
||||
}
|
||||
|
||||
// if the canceled_by has been specified but no task
|
||||
// matches then we prefer matching zero than all tasks.
|
||||
if all_canceled_tasks.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
} else {
|
||||
tasks &= all_canceled_tasks;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kind) = types {
|
||||
let mut kind_tasks = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_tasks |= self.tasks.get_kind(rtxn, *kind)?;
|
||||
}
|
||||
tasks &= &kind_tasks;
|
||||
}
|
||||
|
||||
if let Some(index) = index_uids {
|
||||
let mut index_tasks = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_tasks |= self.tasks.index_tasks(rtxn, index)?;
|
||||
}
|
||||
tasks &= &index_tasks;
|
||||
}
|
||||
|
||||
// For the started_at filter, we need to treat the part of the tasks that are processing from the part of the
|
||||
// tasks that are not processing. The non-processing ones are filtered normally while the processing ones
|
||||
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
||||
tasks = {
|
||||
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
||||
(&tasks - &**processing_tasks, &tasks & &**processing_tasks);
|
||||
|
||||
// special case for Processing tasks
|
||||
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
||||
let mut clear_filtered_processing_tasks =
|
||||
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let is_within_dates = RangeBounds::contains(
|
||||
&(start, end),
|
||||
&processing_batch
|
||||
.as_ref()
|
||||
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
|
||||
.unix_timestamp_nanos(),
|
||||
);
|
||||
if !is_within_dates {
|
||||
filtered_processing_tasks.clear();
|
||||
}
|
||||
};
|
||||
match (after_started_at, before_started_at) {
|
||||
(None, None) => (),
|
||||
(None, Some(before)) => {
|
||||
clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before))
|
||||
}
|
||||
(Some(after), None) => {
|
||||
clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded)
|
||||
}
|
||||
(Some(after), Some(before)) => clear_filtered_processing_tasks(
|
||||
Bound::Excluded(*after),
|
||||
Bound::Excluded(*before),
|
||||
),
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut filtered_non_processing_tasks,
|
||||
self.tasks.started_at,
|
||||
*after_started_at,
|
||||
*before_started_at,
|
||||
)?;
|
||||
filtered_non_processing_tasks | filtered_processing_tasks
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut tasks,
|
||||
self.tasks.enqueued_at,
|
||||
*after_enqueued_at,
|
||||
*before_enqueued_at,
|
||||
)?;
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut tasks,
|
||||
self.tasks.finished_at,
|
||||
*after_finished_at,
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
tasks = if query.reverse.unwrap_or_default() {
|
||||
tasks.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
tasks.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub(crate) fn get_task_ids_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<(RoaringBitmap, u64)> {
|
||||
// compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching
|
||||
// the filter.
|
||||
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
|
||||
// us from modifying the underlying implementation, and the performance remains sufficient.
|
||||
// Should this change, we would modify `get_task_ids` to directly return the number of matching tasks.
|
||||
let total_tasks =
|
||||
self.get_task_ids(rtxn, &query.clone().without_limits(), processing_tasks)?;
|
||||
let mut tasks = self.get_task_ids(rtxn, query, processing_tasks)?;
|
||||
|
||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||
// then we must exclude all the kinds that aren't associated to one and only one index.
|
||||
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||
tasks -= self.tasks.get_kind(rtxn, kind)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Any task that is internally associated with a non-authorized index
|
||||
// must be discarded.
|
||||
if !filters.all_indexes_authorized() {
|
||||
let all_indexes_iter = self.tasks.index_tasks.iter(rtxn)?;
|
||||
for result in all_indexes_iter {
|
||||
let (index, index_tasks) = result?;
|
||||
if !filters.is_index_authorized(index) {
|
||||
tasks -= index_tasks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((tasks, total_tasks.len()))
|
||||
}
|
||||
|
||||
pub(crate) fn get_tasks_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<(Vec<Task>, u64)> {
|
||||
let (tasks, total) =
|
||||
self.get_task_ids_from_authorized_indexes(rtxn, query, filters, processing_tasks)?;
|
||||
let tasks = if query.reverse.unwrap_or_default() {
|
||||
Box::new(tasks.into_iter()) as Box<dyn Iterator<Item = u32>>
|
||||
} else {
|
||||
Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
let tasks = self
|
||||
.tasks
|
||||
.get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
|
||||
|
||||
let ProcessingTasks { batch, processing, progress: _ } = processing_tasks;
|
||||
|
||||
let ret = tasks.into_iter();
|
||||
if processing.is_empty() || batch.is_none() {
|
||||
Ok((ret.collect(), total))
|
||||
} else {
|
||||
// Safe because we ensured there was a batch in the previous branch
|
||||
let batch = batch.as_ref().unwrap();
|
||||
Ok((
|
||||
ret.map(|task| {
|
||||
if processing.contains(task.uid) {
|
||||
Task {
|
||||
status: Status::Processing,
|
||||
batch_uid: Some(batch.uid),
|
||||
started_at: Some(batch.started_at),
|
||||
..task
|
||||
}
|
||||
} else {
|
||||
task
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
total,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,441 +0,0 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, FailureLocation};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn query_tasks_from_and_limit() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
let kind = index_creation_task("whalo", "plankton");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
let kind = index_creation_task("catto", "his_own_vomit");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_n_successful_batches(3);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let processing = index_scheduler.processing_tasks.read().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query { from: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_simple() {
|
||||
let start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
|
||||
|
||||
let query =
|
||||
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should excludes all of them
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should exclude the enqueued tasks and include the only processing task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let second_start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should include all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the second part of the test and before one minute after the
|
||||
// second start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// we run the same query to verify that, and indeed find that the last task is matched
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||
// again only return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// now the last task should have failed
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// so running the last query should return nothing
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![1]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with an invalid uid
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![2]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with a valid uid
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_special_rules() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||
// associated with doggo -> empty result
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||
// -> only the index creation of doggo should be returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![
|
||||
IndexUidPattern::new_unchecked("catto"),
|
||||
IndexUidPattern::new_unchecked("doggo"),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||
// -> all tasks except the swap of catto with whalo are returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_canceled_by() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
let kind = KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_string(),
|
||||
tasks: [0, 1, 2, 3].into_iter().collect(),
|
||||
};
|
||||
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
|
||||
// taskCancelation itself
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// Return only 1 because the user is not authorized to see task 2
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
}
|
||||
@@ -1,398 +0,0 @@
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::error::ErrorCode;
|
||||
use meilisearch_types::tasks::{KindWithContent, Status};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, replace_document_import_task};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn register() {
|
||||
// In this test, the handle doesn't make any progress, we only check that the tasks are registered
|
||||
let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kinds = [
|
||||
index_creation_task("catto", "mouse"),
|
||||
replace_document_import_task("catto", None, 0, 12),
|
||||
replace_document_import_task("catto", None, 1, 50),
|
||||
replace_document_import_task("doggo", Some("bone"), 2, 5000),
|
||||
];
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
file.persist().unwrap();
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(1).unwrap();
|
||||
file.persist().unwrap();
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(2).unwrap();
|
||||
file.persist().unwrap();
|
||||
|
||||
for (idx, kind) in kinds.into_iter().enumerate() {
|
||||
let k = kind.as_kind();
|
||||
let task = index_scheduler.register(kind, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
assert_eq!(task.uid, idx as u32);
|
||||
assert_eq!(task.status, Status::Enqueued);
|
||||
assert_eq!(task.kind.as_kind(), k);
|
||||
}
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dry_run() {
|
||||
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, None, true).unwrap();
|
||||
snapshot!(task.uid, @"0");
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, Some(12), true).unwrap();
|
||||
snapshot!(task.uid, @"12");
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_set_taskid() {
|
||||
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(task.uid, @"0");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, Some(12), false).unwrap();
|
||||
snapshot!(task.uid, @"12");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let error = index_scheduler.register(kind, Some(5), false).unwrap_err();
|
||||
snapshot!(error, @"Received bad task id: 5 should be >= to 13.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disable_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.cleanup_enabled = false;
|
||||
config.max_number_of_tasks = 2;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function no new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.max_number_of_tasks = 2;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
// a new task deletion has been enqueued
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_queue_is_full() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
// that's the minimum map size possible
|
||||
config.task_db_size = 1048576 * 3;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
// on average this task takes ~600 bytes
|
||||
loop {
|
||||
let result = index_scheduler.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
);
|
||||
if result.is_err() {
|
||||
break;
|
||||
}
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
// at this point the task DB shoud have reached its limit and we should not be able to register new tasks
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// Even the task deletion that doesn't delete anything shouldn't be accepted
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// But a task deletion that delete something should works
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// Now we should be able to enqueue a few tasks again
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
@@ -1,474 +0,0 @@
|
||||
/*!
|
||||
The autobatcher is responsible for combining the next enqueued
|
||||
tasks affecting a single index into a [batch](crate::batch::Batch).
|
||||
|
||||
The main function of the autobatcher is [`next_autobatch`].
|
||||
*/
|
||||
|
||||
use meilisearch_types::tasks::TaskId;
|
||||
use std::ops::ControlFlow::{self, Break, Continue};
|
||||
|
||||
use crate::KindWithContent;
|
||||
|
||||
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
|
||||
/// for the purpose of simplifying the implementation of the autobatcher.
|
||||
///
|
||||
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
|
||||
enum AutobatchKind {
|
||||
DocumentImport { allow_index_creation: bool, primary_key: Option<String> },
|
||||
DocumentEdition,
|
||||
DocumentDeletion { by_filter: bool },
|
||||
DocumentClear,
|
||||
Settings { allow_index_creation: bool },
|
||||
IndexCreation,
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { allow_index_creation, .. }
|
||||
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<KindWithContent> for AutobatchKind {
|
||||
fn from(kind: KindWithContent) -> Self {
|
||||
match kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
allow_index_creation, primary_key, ..
|
||||
} => AutobatchKind::DocumentImport { allow_index_creation, primary_key },
|
||||
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||
KindWithContent::DocumentDeletion { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: false }
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: true }
|
||||
}
|
||||
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
||||
AutobatchKind::Settings {
|
||||
allow_index_creation: allow_index_creation && !is_deletion,
|
||||
}
|
||||
}
|
||||
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BatchKind {
|
||||
DocumentClear {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentOperation {
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentEdition {
|
||||
id: TaskId,
|
||||
},
|
||||
DocumentDeletion {
|
||||
deletion_ids: Vec<TaskId>,
|
||||
includes_by_filter: bool,
|
||||
},
|
||||
ClearAndSettings {
|
||||
other: Vec<TaskId>,
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
IndexDeletion {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
IndexCreation {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexUpdate {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
// TODO use an AutoBatchKind as input
|
||||
pub fn new(
|
||||
task_id: TaskId,
|
||||
kind: KindWithContent,
|
||||
primary_key: Option<&str>,
|
||||
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match AutobatchKind::from(kind) {
|
||||
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
|
||||
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
|
||||
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
|
||||
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
{
|
||||
(
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key: pk,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
)
|
||||
}
|
||||
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
|
||||
K::DocumentImport { allow_index_creation, primary_key } => (
|
||||
Break(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
),
|
||||
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||
K::DocumentDeletion { by_filter: includes_by_filter } => (
|
||||
Continue(BatchKind::DocumentDeletion {
|
||||
deletion_ids: vec![task_id],
|
||||
includes_by_filter,
|
||||
}),
|
||||
false,
|
||||
),
|
||||
K::Settings { allow_index_creation } => (
|
||||
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
||||
allow_index_creation,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
#[rustfmt::skip]
|
||||
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match (self, kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break(this)
|
||||
},
|
||||
// NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue.
|
||||
// I wrote it this way because it's easier to understand than the other way around.
|
||||
(this, kind) if !(
|
||||
// 1. If both task don't interact with primary key -> we can continue
|
||||
(this.primary_key().is_none() && kind.primary_key().is_none()) ||
|
||||
// 2. Else ->
|
||||
(
|
||||
// 2.1 If we already have a primary-key ->
|
||||
(
|
||||
primary_key.is_some() &&
|
||||
// 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key
|
||||
// 2.1.2 If the task don't have a primary-key -> we can continue
|
||||
kind.primary_key().map_or(true, |pk| pk == primary_key)
|
||||
) ||
|
||||
// 2.2 If we don't have a primary-key ->
|
||||
(
|
||||
// 2.2.1 If both the batch and the task have a primary key they should be equal
|
||||
// 2.2.2 If the batch is set to Some(None), the task should be too
|
||||
// 2.2.3 If the batch is set to None -> we can continue
|
||||
this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind)
|
||||
)
|
||||
)
|
||||
|
||||
) // closing the negation
|
||||
|
||||
=> {
|
||||
Break(this)
|
||||
},
|
||||
// The index deletion can batch with everything but must stop after
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids }
|
||||
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
|
||||
| BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, operation_ids: mut ids }
|
||||
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
ids.append(&mut other);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids },
|
||||
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
|
||||
) => {
|
||||
ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids })
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentClear { .. },
|
||||
K::DocumentImport { .. } | K::Settings { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, mut operation_ids },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: operation_ids })
|
||||
}
|
||||
|
||||
// we can autobatch different kind of document operations and mix replacements with updates
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { primary_key: pk, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
operation_ids,
|
||||
primary_key: pk,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation, primary_key, mut operation_ids },
|
||||
K::DocumentDeletion { by_filter: false },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// We can't batch a document operation with a delete by filter
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentDeletion { by_filter: true },
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
||||
}
|
||||
// we can't autobatch the deletion and import if the document deletion contained a filter
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
|
||||
K::DocumentImport { .. }
|
||||
) => Break(this),
|
||||
// we can autobatch the deletion and import if the index already exists
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { allow_index_creation, primary_key }
|
||||
) if index_already_exists => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can autobatch the deletion and import if both can't create an index
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { allow_index_creation, primary_key }
|
||||
) if !allow_index_creation => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { .. },
|
||||
K::DocumentImport { .. }
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
|
||||
}
|
||||
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
other: vec![id],
|
||||
}),
|
||||
(
|
||||
this @ BatchKind::Settings { .. },
|
||||
K::DocumentImport { .. } | K::DocumentDeletion { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::Settings { mut settings_ids, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::Settings {
|
||||
allow_index_creation,
|
||||
settings_ids,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
|
||||
(
|
||||
BatchKind::ClearAndSettings {
|
||||
mut other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
},
|
||||
K::DocumentDeletion { .. },
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a batch from an ordered list of tasks.
|
||||
///
|
||||
/// ## Preconditions
|
||||
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
|
||||
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
|
||||
/// 3. The tasks must all be related to the same index
|
||||
///
|
||||
/// ## Return
|
||||
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
|
||||
/// a subset of the given tasks.
|
||||
pub fn autobatch(
|
||||
enqueued: Vec<(TaskId, KindWithContent)>,
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
let mut enqueued = enqueued.into_iter();
|
||||
let (id, kind) = enqueued.next()?;
|
||||
|
||||
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
|
||||
let mut index_exist = index_already_exists;
|
||||
|
||||
let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) {
|
||||
(Continue(acc), create) => (acc, create),
|
||||
(Break(acc), create) => return Some((acc, create)),
|
||||
};
|
||||
|
||||
// if an index has been created in the previous step we can consider it as existing.
|
||||
index_exist |= must_create_index;
|
||||
|
||||
for (id, kind) in enqueued {
|
||||
acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) {
|
||||
Continue(acc) => acc,
|
||||
Break(acc) => return Some((acc, must_create_index)),
|
||||
};
|
||||
}
|
||||
|
||||
Some((acc, must_create_index))
|
||||
}
|
||||
@@ -1,395 +0,0 @@
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
self, ReplaceDocuments, UpdateDocuments,
|
||||
};
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use uuid::Uuid;
|
||||
|
||||
use self::autobatcher::{autobatch, BatchKind};
|
||||
use super::*;
|
||||
use crate::TaskId;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! debug_snapshot {
|
||||
($value:expr, @$snapshot:literal) => {{
|
||||
let value = format!("{:?}", $value);
|
||||
meili_snap::snapshot!(value, @$snapshot);
|
||||
}};
|
||||
}
|
||||
|
||||
fn autobatch_from(
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
input: impl IntoIterator<Item = KindWithContent>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
autobatch(
|
||||
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(),
|
||||
index_already_exists,
|
||||
primary_key,
|
||||
)
|
||||
}
|
||||
|
||||
fn doc_imp(
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> KindWithContent {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
primary_key: primary_key.map(|pk| pk.to_string()),
|
||||
method,
|
||||
content_file: Uuid::new_v4(),
|
||||
documents_count: 0,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: String::from("doggo"),
|
||||
documents_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del_fil() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: String::from("doggo"),
|
||||
filter_expr: serde_json::json!("cuteness > 100"),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_clr() -> KindWithContent {
|
||||
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn settings(allow_index_creation: bool) -> KindWithContent {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
new_settings: Default::default(),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn idx_create() -> KindWithContent {
|
||||
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_update() -> KindWithContent {
|
||||
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_del() -> KindWithContent {
|
||||
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn idx_swap() -> KindWithContent {
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_simple_operation_together() {
|
||||
// we can autobatch one or multiple `ReplaceDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// we can autobatch one or multiple `UpdateDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletion together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletionByFilter together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
|
||||
// we can autobatch one or multiple Settings together
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// We can autobatch document addition with document deletion
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
|
||||
// But we can't autobatch document addition with document deletion by filter
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_different_document_operations_autobatch_together() {
|
||||
// addition and updates with deletion by filter can't batch together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions() {
|
||||
// these two doesn't need to batch
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
// Basic use case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// This batch kind doesn't mix with other document addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// But you can batch multiple clear together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions_and_settings() {
|
||||
// A clear don't need to autobatch the settings that happens AFTER there is no documents
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anything_and_index_deletion() {
|
||||
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
|
||||
// First, the basic cases
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowed_and_disallowed_index_creation() {
|
||||
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_primary_key() {
|
||||
// ==> If I have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// ==> If I don't have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
}
|
||||
@@ -1,567 +0,0 @@
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::autobatcher::{self, BatchKind};
|
||||
use crate::utils::ProcessingBatch;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
/// Represents a combination of tasks that can all be processed at the same time.
|
||||
///
|
||||
/// A batch contains the set of tasks that it represents (accessible through
|
||||
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
|
||||
/// be processed.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum Batch {
|
||||
TaskCancelation {
|
||||
/// The task cancelation itself.
|
||||
task: Task,
|
||||
},
|
||||
TaskDeletions(Vec<Task>),
|
||||
SnapshotCreation(Vec<Task>),
|
||||
Dump(Task),
|
||||
IndexOperation {
|
||||
op: IndexOperation,
|
||||
must_create_index: bool,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
task: Task,
|
||||
},
|
||||
IndexUpdate {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
task: Task,
|
||||
},
|
||||
IndexDeletion {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
index_has_been_created: bool,
|
||||
},
|
||||
IndexSwap {
|
||||
task: Task,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum DocumentOperation {
|
||||
Replace(Uuid),
|
||||
Update(Uuid),
|
||||
Delete(Vec<String>),
|
||||
}
|
||||
|
||||
/// A [batch](Batch) that combines multiple tasks operating on an index.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum IndexOperation {
|
||||
DocumentOperation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentEdition {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentClear {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
Settings {
|
||||
index_uid: String,
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentClearAndSetting {
|
||||
index_uid: String,
|
||||
cleared_tasks: Vec<Task>,
|
||||
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
settings_tasks: Vec<Task>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
/// Return the task ids associated with this batch.
|
||||
pub fn ids(&self) -> RoaringBitmap {
|
||||
match self {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
| Batch::TaskDeletions(tasks)
|
||||
| Batch::UpgradeDatabase { tasks }
|
||||
| Batch::IndexDeletion { tasks, .. } => {
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { tasks, .. }
|
||||
| IndexOperation::Settings { tasks, .. }
|
||||
| IndexOperation::DocumentDeletion { tasks, .. }
|
||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
IndexOperation::DocumentEdition { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
cleared_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
|
||||
},
|
||||
Batch::IndexSwap { task } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the index UID associated with this batch
|
||||
pub fn index_uid(&self) -> Option<&str> {
|
||||
use Batch::*;
|
||||
match self {
|
||||
TaskCancelation { .. }
|
||||
| TaskDeletions(_)
|
||||
| SnapshotCreation(_)
|
||||
| Dump(_)
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Batch {
|
||||
/// A text used when we debug the profiling reports.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let index_uid = self.index_uid();
|
||||
let tasks = self.ids();
|
||||
match self {
|
||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
||||
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
||||
Batch::Dump(_) => f.write_str("Dump")?,
|
||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
||||
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
pub fn index_uid(&self) -> &str {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { index_uid, .. }
|
||||
| IndexOperation::DocumentEdition { index_uid, .. }
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for IndexOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::DocumentOperation")
|
||||
}
|
||||
IndexOperation::DocumentEdition { .. } => {
|
||||
f.write_str("IndexOperation::DocumentEdition")
|
||||
}
|
||||
IndexOperation::DocumentDeletion { .. } => {
|
||||
f.write_str("IndexOperation::DocumentDeletion")
|
||||
}
|
||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
/// ## Arguments
|
||||
/// - `rtxn`: read transaction
|
||||
/// - `index_uid`: name of the index affected by the operations of the autobatch
|
||||
/// - `batch`: the result of the autobatcher
|
||||
pub(crate) fn create_next_batch_index(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
index_uid: String,
|
||||
batch: BatchKind,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
must_create_index: bool,
|
||||
) -> Result<Option<Batch>> {
|
||||
match batch {
|
||||
BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClear {
|
||||
tasks: self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
ids,
|
||||
)?,
|
||||
index_uid,
|
||||
},
|
||||
must_create_index,
|
||||
})),
|
||||
BatchKind::DocumentEdition { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
match &task.kind {
|
||||
KindWithContent::DocumentEdition { index_uid, .. } => {
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentEdition {
|
||||
index_uid: index_uid.clone(),
|
||||
task,
|
||||
},
|
||||
must_create_index: false,
|
||||
}))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
BatchKind::DocumentOperation { operation_ids, .. } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
operation_ids,
|
||||
)?;
|
||||
let primary_key = tasks
|
||||
.iter()
|
||||
.find_map(|task| match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
|
||||
// we want to stop on the first document addition
|
||||
Some(primary_key.clone())
|
||||
}
|
||||
KindWithContent::DocumentDeletion { .. } => None,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.flatten();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
content_file, method, ..
|
||||
} => match method {
|
||||
IndexDocumentsMethod::ReplaceDocuments => {
|
||||
operations.push(DocumentOperation::Replace(content_file))
|
||||
}
|
||||
IndexDocumentsMethod::UpdateDocuments => {
|
||||
operations.push(DocumentOperation::Update(content_file))
|
||||
}
|
||||
_ => unreachable!("Unknown document merging method"),
|
||||
},
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
deletion_ids,
|
||||
)?;
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentDeletion { index_uid, tasks },
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::Settings { settings_ids, .. } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
settings_ids,
|
||||
)?;
|
||||
|
||||
let mut settings = Vec::new();
|
||||
for task in &tasks {
|
||||
match task.kind {
|
||||
KindWithContent::SettingsUpdate {
|
||||
ref new_settings, is_deletion, ..
|
||||
} => settings.push((is_deletion, *new_settings.clone())),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { index_uid, settings, tasks },
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => {
|
||||
let (index_uid, settings, settings_tasks) = match self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid,
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?
|
||||
.unwrap()
|
||||
{
|
||||
Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { index_uid, settings, tasks, .. },
|
||||
..
|
||||
} => (index_uid, settings, tasks),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (index_uid, cleared_tasks) = match self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid,
|
||||
BatchKind::DocumentClear { ids: other },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?
|
||||
.unwrap()
|
||||
{
|
||||
Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClear { index_uid, tasks },
|
||||
..
|
||||
} => (index_uid, tasks),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
cleared_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::IndexCreation { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
let (index_uid, primary_key) = match &task.kind {
|
||||
KindWithContent::IndexCreation { index_uid, primary_key } => {
|
||||
(index_uid.clone(), primary_key.clone())
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Ok(Some(Batch::IndexCreation { index_uid, primary_key, task }))
|
||||
}
|
||||
BatchKind::IndexUpdate { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
let primary_key = match &task.kind {
|
||||
KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task }))
|
||||
}
|
||||
BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion {
|
||||
index_uid,
|
||||
index_has_been_created: must_create_index,
|
||||
tasks: self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
ids,
|
||||
)?,
|
||||
})),
|
||||
BatchKind::IndexSwap { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexSwap { task }))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create the next batch to be processed;
|
||||
/// 1. We get the *last* task to cancel.
|
||||
/// 2. We get the *next* task to delete.
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
) -> Result<Option<(Batch, ProcessingBatch)>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
let batch_id = self.queue.batches.next_batch_id(rtxn)?;
|
||||
let mut current_batch = ProcessingBatch::new(batch_id);
|
||||
|
||||
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
|
||||
// 0. The priority over everything is to upgrade the instance
|
||||
// There shouldn't be multiple upgrade tasks but just in case we're going to batch all of them at the same time
|
||||
let upgrade = self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)? & (enqueued | failed);
|
||||
if !upgrade.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, upgrade)?;
|
||||
// In the case of an upgrade database batch, we want to find back the original batch that tried processing it
|
||||
// and re-use its id
|
||||
if let Some(batch_uid) = tasks.last().unwrap().batch_uid {
|
||||
current_batch.uid = batch_uid;
|
||||
}
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::UpgradeDatabase { tasks }, current_batch)));
|
||||
}
|
||||
|
||||
// 1. we get the last task to cancel.
|
||||
let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
if let Some(task_id) = to_cancel.max() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::TaskCancelation { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 2. we get the next task to delete
|
||||
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
|
||||
if !to_delete.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
// and use the autobatcher to batch the enqueued tasks associated with it
|
||||
|
||||
let index_name = if let Some(&index_name) = task.indexes().first() {
|
||||
index_name
|
||||
} else {
|
||||
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
|
||||
};
|
||||
|
||||
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
|
||||
let mut primary_key = None;
|
||||
if index_already_exists {
|
||||
let index = self.index_mapper.index(rtxn, index_name)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
}
|
||||
|
||||
let index_tasks = self.queue.tasks.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit = if self.scheduler.autobatching_enabled {
|
||||
self.scheduler.max_number_of_batched_tasks
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
let mut enqueued = Vec::new();
|
||||
let mut total_size: u64 = 0;
|
||||
for task_id in index_tasks.into_iter().take(tasks_limit) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
||||
total_size = total_size.saturating_add(content_size);
|
||||
}
|
||||
|
||||
if total_size > self.scheduler.batched_tasks_size_limit && !enqueued.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
enqueued.push((task.uid, task.kind));
|
||||
}
|
||||
|
||||
if let Some((batchkind, create_index)) =
|
||||
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
|
||||
{
|
||||
return Ok(self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_name.to_string(),
|
||||
batchkind,
|
||||
&mut current_batch,
|
||||
create_index,
|
||||
)?
|
||||
.map(|batch| (batch, current_batch)));
|
||||
}
|
||||
|
||||
// If we found no tasks then we were notified for something that got autobatched
|
||||
// somehow and there is nothing to do.
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
@@ -1,453 +0,0 @@
|
||||
mod autobatcher;
|
||||
#[cfg(test)]
|
||||
mod autobatcher_test;
|
||||
mod create_batch;
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_index_operation;
|
||||
mod process_snapshot_creation;
|
||||
mod process_upgrade;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
#[cfg(test)]
|
||||
mod test_document_addition;
|
||||
#[cfg(test)]
|
||||
mod test_embedders;
|
||||
#[cfg(test)]
|
||||
mod test_failure;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use process_batch::ProcessBatchInfo;
|
||||
use rayon::current_num_threads;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
|
||||
use crate::processing::{AtomicTaskStep, BatchProgress};
|
||||
use crate::{Error, IndexScheduler, IndexSchedulerOptions, Result, TickOutcome};
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct MustStopProcessing(Arc<AtomicBool>);
|
||||
|
||||
impl MustStopProcessing {
|
||||
pub fn get(&self) -> bool {
|
||||
self.0.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn must_stop(&self) {
|
||||
self.0.store(true, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn reset(&self) {
|
||||
self.0.store(false, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scheduler {
|
||||
/// A boolean that can be set to true to stop the currently processing tasks.
|
||||
pub must_stop_processing: MustStopProcessing,
|
||||
|
||||
/// Get a signal when a batch needs to be processed.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) max_number_of_batched_tasks: usize,
|
||||
|
||||
/// The maximum size, in bytes, of tasks in a batch.
|
||||
pub(crate) batched_tasks_size_limit: u64,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
/// The path used to create the snapshots.
|
||||
pub(crate) snapshots_path: PathBuf,
|
||||
|
||||
/// The path to the folder containing the auth LMDB env.
|
||||
pub(crate) auth_env: Env<WithoutTls>,
|
||||
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// The maximal number of entries in the search query cache of an embedder.
|
||||
///
|
||||
/// 0 disables the cache.
|
||||
pub(crate) embedding_cache_cap: usize,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub(crate) fn private_clone(&self) -> Scheduler {
|
||||
Scheduler {
|
||||
must_stop_processing: self.must_stop_processing.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: self.batched_tasks_size_limit,
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
auth_env: self.auth_env.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
embedding_cache_cap: self.embedding_cache_cap,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
|
||||
Scheduler {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: options.batched_tasks_size_limit,
|
||||
dumps_path: options.dumps_path.clone(),
|
||||
snapshots_path: options.snapshots_path.clone(),
|
||||
auth_env,
|
||||
version_file_path: options.version_file_path.clone(),
|
||||
embedding_cache_cap: options.embedding_cache_cap,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Perform one iteration of the run loop.
|
||||
///
|
||||
/// 1. See if we need to cleanup the task queue
|
||||
/// 2. Find the next batch of tasks to be processed.
|
||||
/// 3. Update the information of these tasks following the start of their processing.
|
||||
/// 4. Update the in-memory list of processed tasks accordingly.
|
||||
/// 5. Process the batch:
|
||||
/// - perform the actions of each batched task
|
||||
/// - update the information of each batched task following the end
|
||||
/// of their processing.
|
||||
/// 6. Reset the in-memory list of processed tasks.
|
||||
///
|
||||
/// Returns the number of processed tasks.
|
||||
pub(crate) fn tick(&self) -> Result<TickOutcome> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
*self.run_loop_iteration.write().unwrap() += 1;
|
||||
self.breakpoint(crate::test_utils::Breakpoint::Start);
|
||||
}
|
||||
|
||||
if self.cleanup_enabled {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.queue.cleanup_task_queue(&mut wtxn)?;
|
||||
wtxn.commit()?;
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
let (batch, mut processing_batch) =
|
||||
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
|
||||
Some(batch) => batch,
|
||||
None => return Ok(TickOutcome::WaitForSignal),
|
||||
};
|
||||
let index_uid = batch.index_uid().map(ToOwned::to_owned);
|
||||
drop(rtxn);
|
||||
|
||||
// 1. store the starting date with the bitmap of processing tasks.
|
||||
let mut ids = batch.ids();
|
||||
let processed_tasks = ids.len();
|
||||
|
||||
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
||||
self.scheduler.must_stop_processing.reset();
|
||||
let progress = self
|
||||
.processing_tasks
|
||||
.write()
|
||||
.unwrap()
|
||||
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
||||
.start_processing(processing_batch.clone(), ids.clone());
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::BatchCreated);
|
||||
|
||||
// 2. Process the tasks
|
||||
let res = {
|
||||
let cloned_index_scheduler = self.private_clone();
|
||||
let processing_batch = &mut processing_batch;
|
||||
let progress = progress.clone();
|
||||
std::thread::scope(|s| {
|
||||
let p = progress.clone();
|
||||
let handle = std::thread::Builder::new()
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn_scoped(s, move || {
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, p)
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
match handle.join() {
|
||||
Ok(ret) => {
|
||||
if ret.is_err() {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
Err(panic) => {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
Err(Error::ProcessBatchPanicked(msg.to_string()))
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
self.index_mapper.set_currently_updating_index(None);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
||||
processing_batch.finished();
|
||||
let mut stop_scheduler_forever = false;
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
let mut process_batch_info = ProcessBatchInfo::default();
|
||||
|
||||
match res {
|
||||
Ok((tasks, info)) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
||||
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
process_batch_info = info;
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
let mut canceled_by = None;
|
||||
|
||||
#[allow(unused_variables)]
|
||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
processing_batch.update(&mut task);
|
||||
if task.status == Status::Canceled {
|
||||
canceled.insert(task.uid);
|
||||
canceled_by = task.canceled_by;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchSuccess {
|
||||
task_uid: i as u32,
|
||||
},
|
||||
)?;
|
||||
|
||||
match task.error {
|
||||
Some(_) => failure += 1,
|
||||
None => success += 1,
|
||||
}
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?;
|
||||
}
|
||||
tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
|
||||
}
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
|
||||
..
|
||||
})
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AbortedIndexation);
|
||||
wtxn.abort();
|
||||
|
||||
tracing::info!("A batch of tasks was aborted.");
|
||||
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
|
||||
// this is because we want to let the next tick call `create_next_batch` and keep
|
||||
// the `started_at` date times and `processings` of the current processing tasks.
|
||||
// This date time is used by the task cancelation to store the right `started_at`
|
||||
// date in the task on disk.
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
// If an index said it was full, we need to:
|
||||
// 1. identify which index is full
|
||||
// 2. close the associated environment
|
||||
// 3. resize it
|
||||
// 4. re-schedule tasks
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
|
||||
..
|
||||
}) if index_uid.is_some() => {
|
||||
// fixme: add index_uid to match to avoid the unwrap
|
||||
let index_uid = index_uid.unwrap();
|
||||
// fixme: handle error more gracefully? not sure when this could happen
|
||||
self.index_mapper.resize_index(&wtxn, &index_uid)?;
|
||||
wtxn.abort();
|
||||
|
||||
tracing::info!("The max database size was reached. Resizing the index.");
|
||||
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
// In case of a failure we must get back and patch all the tasks with the error.
|
||||
Err(err) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
|
||||
if matches!(err, Error::DatabaseUpgrade(_)) {
|
||||
tracing::error!(
|
||||
"Upgrade task failed, tasks won't be processed until the following issue is fixed: {err}"
|
||||
);
|
||||
stop_scheduler_forever = true;
|
||||
}
|
||||
let error: ResponseError = err.into();
|
||||
for id in ids.iter() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
let mut task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&wtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(error.clone());
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
processing_batch.update(&mut task);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchFailure,
|
||||
)?;
|
||||
|
||||
tracing::error!("Batch failed {}", error);
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We must re-add the canceled task so they're part of the same batch.
|
||||
ids |= canceled;
|
||||
|
||||
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
|
||||
process_batch_info;
|
||||
|
||||
processing_batch.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
let mut congestion_info = serde_json::Map::new();
|
||||
congestion_info.insert("attempts".into(), congestion.attempts.into());
|
||||
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
|
||||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use byte_unit::{Byte, UnitType::Binary};
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((
|
||||
dbname.to_case(Case::Camel),
|
||||
format!("{post:#.2} ({sign}{diff:#.2})").into(),
|
||||
))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(congestion) = congestion {
|
||||
tracing::debug!(
|
||||
"Channel congestion metrics - Attempts: {}, Blocked attempts: {} ({:.1}% congestion)",
|
||||
congestion.attempts,
|
||||
congestion.blocking_attempts,
|
||||
congestion.congestion_ratio(),
|
||||
);
|
||||
}
|
||||
|
||||
tracing::debug!("call trace: {:?}", progress.accumulated_durations());
|
||||
|
||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
|
||||
let idx = AtomicU32::new(0);
|
||||
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
|
||||
let rtxn = self.read_txn()?;
|
||||
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
|
||||
tracing::error!(
|
||||
"Failure to delete the content files associated with task {}. Error: {e}",
|
||||
task.uid
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||
let _ = self.notify_webhook(&ids);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
|
||||
|
||||
if stop_scheduler_forever {
|
||||
Ok(TickOutcome::StopProcessingForever)
|
||||
} else {
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,687 +0,0 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{
|
||||
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
|
||||
ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
pub congestion: Option<ChannelCongestion>,
|
||||
/// The sizes of the different databases before starting the indexation.
|
||||
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
/// The sizes of the different databases after commiting the indexation.
|
||||
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Apply the operation associated with the given batch.
|
||||
///
|
||||
/// ## Return
|
||||
/// The list of tasks that were processed. The metadata of each task in the returned
|
||||
/// list is updated accordingly, with the exception of the its date fields
|
||||
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
||||
#[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
||||
pub(crate) fn process_batch(
|
||||
&self,
|
||||
batch: Batch,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::PanicInsideProcessBatch)?;
|
||||
self.breakpoint(crate::test_utils::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
let matched_tasks =
|
||||
if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind {
|
||||
tasks
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let mut canceled_tasks = self.cancel_matched_tasks(
|
||||
&rtxn,
|
||||
task.uid,
|
||||
current_batch,
|
||||
matched_tasks,
|
||||
&progress,
|
||||
)?;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
match &mut task.details {
|
||||
Some(Details::TaskCancelation {
|
||||
matched_tasks: _,
|
||||
canceled_tasks: canceled_tasks_details,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*canceled_tasks_details = Some(canceled_tasks.len() as u64);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
canceled_tasks.push(task);
|
||||
|
||||
Ok((canceled_tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::TaskDeletions(mut tasks) => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
let mut matched_tasks = RoaringBitmap::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
|
||||
matched_tasks |= tasks;
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut deleted_tasks =
|
||||
self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Succeeded;
|
||||
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
|
||||
deleted_tasks -= tasks;
|
||||
|
||||
match &mut task.details {
|
||||
Some(Details::TaskDeletion {
|
||||
matched_tasks: _,
|
||||
deleted_tasks,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*deleted_tasks = Some(deleted_tasks_count);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::SnapshotCreation(tasks) => self
|
||||
.process_snapshot(progress, tasks)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::Dump(task) => self
|
||||
.process_dump_creation(progress, task)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::IndexOperation { op, must_create_index } => {
|
||||
let index_uid = op.index_uid().to_string();
|
||||
let index = if must_create_index {
|
||||
// create the index if it doesn't already exist
|
||||
let wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.create_index(wtxn, &index_uid, None)?
|
||||
} else {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, &index_uid)?
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
self.index_mapper
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
|
||||
|
||||
{
|
||||
progress.update_progress(FinalizingIndexStep::Committing);
|
||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||
let _entered = span.enter();
|
||||
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let mut post_commit_dabases_sizes = None;
|
||||
let res = || -> Result<()> {
|
||||
progress.update_progress(FinalizingIndexStep::ComputingStats);
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
let info = ProcessBatchInfo {
|
||||
congestion,
|
||||
// In case we fail to the get post-commit sizes we decide
|
||||
// that nothing changed and use the pre-commit sizes.
|
||||
post_commit_dabases_sizes: post_commit_dabases_sizes
|
||||
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
|
||||
pre_commit_dabases_sizes,
|
||||
};
|
||||
|
||||
Ok((tasks, info))
|
||||
}
|
||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
||||
|
||||
let wtxn = self.env.write_txn()?;
|
||||
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
||||
return Err(Error::IndexAlreadyExists(index_uid));
|
||||
}
|
||||
self.index_mapper.create_index(wtxn, &index_uid, None)?;
|
||||
|
||||
self.process_batch(
|
||||
Batch::IndexUpdate { index_uid, primary_key, task },
|
||||
current_batch,
|
||||
progress,
|
||||
)
|
||||
}
|
||||
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
|
||||
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let index = self.index_mapper.index(&rtxn, &index_uid)?;
|
||||
|
||||
if let Some(primary_key) = primary_key.clone() {
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let mut builder = MilliSettings::new(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
self.index_mapper.indexer_config(),
|
||||
);
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// drop rtxn before starting a new wtxn on the same db
|
||||
rtxn.commit()?;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::IndexInfo { primary_key });
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
||||
let wtxn = self.env.write_txn()?;
|
||||
|
||||
// it's possible that the index doesn't exist
|
||||
let number_of_documents = || -> Result<u64> {
|
||||
let index = self.index_mapper.index(&wtxn, &index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
index
|
||||
.number_of_documents(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))
|
||||
}()
|
||||
.unwrap_or_default();
|
||||
|
||||
// The write transaction is directly owned and committed inside.
|
||||
match self.index_mapper.delete_index(wtxn, &index_uid) {
|
||||
Ok(()) => (),
|
||||
Err(Error::IndexNotFound(_)) if index_has_been_created => (),
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
|
||||
// We set all the tasks details to the default value.
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = match &task.kind {
|
||||
KindWithContent::IndexDeletion { .. } => {
|
||||
Some(Details::ClearAll { deleted_documents: Some(number_of_documents) })
|
||||
}
|
||||
otherwise => otherwise.default_finished_details(),
|
||||
};
|
||||
}
|
||||
|
||||
// Here we could also show that all the internal database sizes goes to 0
|
||||
// but it would mean opening the index and that's costly.
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexSwap { mut task } => {
|
||||
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
|
||||
swaps
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let mut not_found_indexes = BTreeSet::new();
|
||||
for IndexSwap { indexes: (lhs, rhs) } in swaps {
|
||||
for index in [lhs, rhs] {
|
||||
let index_exists = self.index_mapper.index_exists(&wtxn, index)?;
|
||||
if !index_exists {
|
||||
not_found_indexes.insert(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !not_found_indexes.is_empty() {
|
||||
if not_found_indexes.len() == 1 {
|
||||
return Err(Error::SwapIndexNotFound(
|
||||
not_found_indexes.into_iter().next().unwrap().clone(),
|
||||
));
|
||||
} else {
|
||||
return Err(Error::SwapIndexesNotFound(
|
||||
not_found_indexes.into_iter().cloned().collect(),
|
||||
));
|
||||
}
|
||||
}
|
||||
progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
|
||||
for (step, swap) in swaps.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<SwappingTheIndexes>::new(
|
||||
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
|
||||
step as u32,
|
||||
swaps.len() as u32,
|
||||
));
|
||||
self.apply_index_swap(
|
||||
&mut wtxn,
|
||||
&progress,
|
||||
task.uid,
|
||||
&swap.indexes.0,
|
||||
&swap.indexes.1,
|
||||
)?;
|
||||
}
|
||||
wtxn.commit()?;
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
unreachable!();
|
||||
};
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| self.process_upgrade(from, progress)));
|
||||
match ret {
|
||||
Ok(Ok(())) => (),
|
||||
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
}
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Succeeded;
|
||||
// Since this task can be retried we must reset its error status
|
||||
task.error = None;
|
||||
}
|
||||
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Swap the index `lhs` with the index `rhs`.
|
||||
fn apply_index_swap(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
progress: &Progress,
|
||||
task_id: u32,
|
||||
lhs: &str,
|
||||
rhs: &str,
|
||||
) -> Result<()> {
|
||||
progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
|
||||
// 1. Verify that both lhs and rhs are existing indexes
|
||||
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
||||
if !index_lhs_exists {
|
||||
return Err(Error::IndexNotFound(lhs.to_owned()));
|
||||
}
|
||||
let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?;
|
||||
if !index_rhs_exists {
|
||||
return Err(Error::IndexNotFound(rhs.to_owned()));
|
||||
}
|
||||
|
||||
// 2. Get the task set for index = name that appeared before the index swap task
|
||||
let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?;
|
||||
index_lhs_task_ids.remove_range(task_id..);
|
||||
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
|
||||
index_rhs_task_ids.remove_range(task_id..);
|
||||
|
||||
// 3. before_name -> new_name in the task's KindWithContent
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
|
||||
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
|
||||
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
|
||||
for task_id in tasks_to_update {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||
self.queue.tasks.all_tasks.put(wtxn, &task_id, &task)?;
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 4. remove the task from indexuid = before_name
|
||||
// 5. add the task to indexuid = after_name
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
|
||||
self.queue.tasks.update_index(wtxn, lhs, |lhs_tasks| {
|
||||
*lhs_tasks -= &index_lhs_task_ids;
|
||||
*lhs_tasks |= &index_rhs_task_ids;
|
||||
})?;
|
||||
self.queue.tasks.update_index(wtxn, rhs, |rhs_tasks| {
|
||||
*rhs_tasks -= &index_rhs_task_ids;
|
||||
*rhs_tasks |= &index_lhs_task_ids;
|
||||
})?;
|
||||
|
||||
// 6. Swap in the index mapper
|
||||
self.index_mapper.swap(wtxn, lhs, rhs)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete each given task from all the databases (if it is deleteable).
|
||||
///
|
||||
/// Return the number of tasks that were actually deleted.
|
||||
fn delete_matched_tasks(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<RoaringBitmap> {
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
|
||||
|
||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||
let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?;
|
||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||
|
||||
let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?;
|
||||
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
||||
to_delete_tasks -= &**processing_tasks;
|
||||
to_delete_tasks -= &enqueued_tasks;
|
||||
|
||||
// 2. We now have a list of tasks to delete, delete them
|
||||
let mut affected_indexes = HashSet::new();
|
||||
let mut affected_statuses = HashSet::new();
|
||||
let mut affected_kinds = HashSet::new();
|
||||
let mut affected_canceled_by = RoaringBitmap::new();
|
||||
// The tasks that have been removed *per batches*.
|
||||
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
||||
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
for task_id in to_delete_tasks.iter() {
|
||||
let task =
|
||||
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned()));
|
||||
affected_statuses.insert(task.status);
|
||||
affected_kinds.insert(task.kind.as_kind());
|
||||
// Note: don't delete the persisted task data since
|
||||
// we can only delete succeeded, failed, and canceled tasks.
|
||||
// In each of those cases, the persisted data is supposed to
|
||||
// have been deleted already.
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(canceled_by) = task.canceled_by {
|
||||
affected_canceled_by.insert(canceled_by);
|
||||
}
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
affected_batches.entry(batch_uid).or_default().insert(task_id);
|
||||
}
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(
|
||||
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
|
||||
);
|
||||
progress.update_progress(task_progress);
|
||||
for index in affected_indexes.iter() {
|
||||
self.queue.tasks.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
for status in affected_statuses.iter() {
|
||||
self.queue.tasks.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
for kind in affected_kinds.iter() {
|
||||
self.queue.tasks.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasks);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
for task in to_delete_tasks.iter() {
|
||||
self.queue.tasks.all_tasks.delete(wtxn, &task)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
for canceled_by in affected_canceled_by {
|
||||
if let Some(mut tasks) = self.queue.tasks.canceled_by.get(wtxn, &canceled_by)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
if tasks.is_empty() {
|
||||
self.queue.tasks.canceled_by.delete(wtxn, &canceled_by)?;
|
||||
} else {
|
||||
self.queue.tasks.canceled_by.put(wtxn, &canceled_by, &tasks)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
progress.update_progress(TaskDeletionProgress::DeletingBatches);
|
||||
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
|
||||
progress.update_progress(batch_progress);
|
||||
for (batch_id, to_delete_tasks) in affected_batches {
|
||||
if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
// We must remove the batch entirely
|
||||
if tasks.is_empty() {
|
||||
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
|
||||
if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
earliest,
|
||||
batch_id,
|
||||
)?;
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
oldest,
|
||||
batch_id,
|
||||
)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
batch.started_at,
|
||||
batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch_id,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
|
||||
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
|
||||
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Anyway, we must remove the batch from all its reverse indexes.
|
||||
// The only way to do that is to check
|
||||
|
||||
for index in affected_indexes.iter() {
|
||||
let index_tasks = self.queue.tasks.index_tasks(wtxn, index)?;
|
||||
let remaining_index_tasks = index_tasks & &tasks;
|
||||
if remaining_index_tasks.is_empty() {
|
||||
self.queue.batches.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
for status in affected_statuses.iter() {
|
||||
let status_tasks = self.queue.tasks.get_status(wtxn, *status)?;
|
||||
let remaining_status_tasks = status_tasks & &tasks;
|
||||
if remaining_status_tasks.is_empty() {
|
||||
self.queue.batches.update_status(wtxn, *status, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
for kind in affected_kinds.iter() {
|
||||
let kind_tasks = self.queue.tasks.get_kind(wtxn, *kind)?;
|
||||
let remaining_kind_tasks = kind_tasks & &tasks;
|
||||
if remaining_kind_tasks.is_empty() {
|
||||
self.queue.batches.update_kind(wtxn, *kind, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(to_delete_tasks)
|
||||
}
|
||||
|
||||
/// Cancel each given task from all the databases (if it is cancelable).
|
||||
///
|
||||
/// Returns the list of tasks that matched the filter and must be written in the database.
|
||||
fn cancel_matched_tasks(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
cancel_task_id: TaskId,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(TaskCancelationProgress::RetrievingTasks);
|
||||
|
||||
// 1. Remove from this list the tasks that we are not allowed to cancel
|
||||
// Notice that only the _enqueued_ ones are cancelable and we should
|
||||
// have already aborted the indexation of the _processing_ ones
|
||||
let cancelable_tasks = self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let tasks_to_cancel = cancelable_tasks & matched_tasks;
|
||||
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
// 2. We now have a list of tasks to cancel, cancel them
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(
|
||||
rtxn,
|
||||
tasks_to_cancel.iter().inspect(|_| {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}),
|
||||
)?;
|
||||
|
||||
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Canceled;
|
||||
task.canceled_by = Some(cancel_task_id);
|
||||
task.details = task.details.as_ref().map(|d| d.to_failed());
|
||||
current_batch.processing(Some(task));
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
@@ -1,278 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_dump_creation(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut task: Task,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
let (keys, instance_uid) =
|
||||
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
|
||||
(keys, instance_uid)
|
||||
} else {
|
||||
unreachable!();
|
||||
};
|
||||
let dump = dump::DumpWriter::new(*instance_uid)?;
|
||||
|
||||
// 1. dump the keys
|
||||
progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
for key in keys {
|
||||
dump_keys.push_key(key)?;
|
||||
}
|
||||
dump_keys.flush()?;
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2. dump the tasks
|
||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
|
||||
let (atomic, update_task_progress) =
|
||||
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32);
|
||||
progress.update_progress(update_task_progress);
|
||||
|
||||
for ret in self.queue.tasks.all_tasks.iter(&rtxn)? {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
|
||||
// In the case we're dumping ourselves we want to be marked as finished
|
||||
// to not loop over ourselves indefinitely.
|
||||
if t.uid == task.uid {
|
||||
let finished_at = OffsetDateTime::now_utc();
|
||||
|
||||
// We're going to fake the date because we don't know if everything is going to go well.
|
||||
// But we need to dump the task as finished and successful.
|
||||
// If something fail everything will be set appropriately in the end.
|
||||
t.status = Status::Succeeded;
|
||||
t.started_at = Some(started_at);
|
||||
t.finished_at = Some(finished_at);
|
||||
}
|
||||
|
||||
// Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted.
|
||||
// This prevent from referencing *future* batches not actually associated with the task.
|
||||
//
|
||||
// See <https://github.com/meilisearch/meilisearch/issues/5247> for details.
|
||||
t.batch_uid = None;
|
||||
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
if status == Status::Enqueued {
|
||||
let content_file = self.queue.file_store.get_update(content_file)?;
|
||||
|
||||
for document in
|
||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
||||
{
|
||||
let document = document.map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})?;
|
||||
dump_content_file.push_document(&document)?;
|
||||
}
|
||||
|
||||
dump_content_file.flush()?;
|
||||
}
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. dump the batches
|
||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
|
||||
let (atomic_batch_progress, update_batch_progress) =
|
||||
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
|
||||
progress.update_progress(update_batch_progress);
|
||||
|
||||
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut b) = ret?;
|
||||
// In the case we're dumping ourselves we want to be marked as finished
|
||||
// to not loop over ourselves indefinitely.
|
||||
if b.uid == task.uid {
|
||||
let finished_at = OffsetDateTime::now_utc();
|
||||
|
||||
// We're going to fake the date because we don't know if everything is going to go well.
|
||||
// But we need to dump the task as finished and successful.
|
||||
// If something fail everything will be set appropriately in the end.
|
||||
let mut statuses = BTreeMap::new();
|
||||
statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
|
||||
b.stats.status = statuses;
|
||||
b.finished_at = Some(finished_at);
|
||||
}
|
||||
|
||||
dump_batches.push_batch(&b)?;
|
||||
atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
|
||||
// 4. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
progress.update_progress(VariableNameStep::<DumpCreationProgress>::new(
|
||||
uid.to_string(),
|
||||
count,
|
||||
nb_indexes,
|
||||
));
|
||||
count += 1;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index
|
||||
.created_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
updated_at: index
|
||||
.updated_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index
|
||||
.embedding_configs(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let nb_documents = index
|
||||
.number_of_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
as u32;
|
||||
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
|
||||
progress.update_progress(update_document_progress);
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 4.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&rtxn, id)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
let user_err =
|
||||
milli::Error::UserError(milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(&rtxn, std::iter::once(id))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={id}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
});
|
||||
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
index_dumper.push_document(&document)?;
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
meilisearch_types::settings::SecretPolicy::RevealSecrets,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
index_dumper.settings(&settings)?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 5. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
let network = self.network();
|
||||
dump.create_network(network)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
progress.update_progress(DumpCreationProgress::CompressTheDump);
|
||||
let path = self.scheduler.dumps_path.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
// if we reached this step we can tell the scheduler we succeeded to dump ourselves.
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::Dump { dump_uid: Some(dump_uid) });
|
||||
Ok(vec![task])
|
||||
}
|
||||
}
|
||||
@@ -1,539 +0,0 @@
|
||||
use bumpalo::collections::CollectIn;
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli::documents::PrimaryKey;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use meilisearch_types::Index;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::{DocumentOperation, IndexOperation};
|
||||
use crate::processing::{
|
||||
DocumentDeletionProgress, DocumentEditionProgress, DocumentOperationProgress, SettingsProgress,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Process the index operation on the given index.
|
||||
///
|
||||
/// ## Return
|
||||
/// The list of processed tasks.
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self, index_wtxn, index, progress),
|
||||
target = "indexing::scheduler"
|
||||
)]
|
||||
pub(crate) fn apply_index_operation<'i>(
|
||||
&self,
|
||||
index_wtxn: &mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
progress: &Progress,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
let indexer_alloc = Bump::new();
|
||||
let started_processing_at = std::time::Instant::now();
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { index_uid, mut tasks } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index)
|
||||
.execute()
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid)))?;
|
||||
|
||||
let mut first_clear_found = false;
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
// The first document clear will effectively delete every documents
|
||||
// in the database but the next ones will clear 0 documents.
|
||||
task.details = match &task.kind {
|
||||
KindWithContent::DocumentClear { .. } => {
|
||||
let count = if first_clear_found { 0 } else { count };
|
||||
first_clear_found = true;
|
||||
Some(Details::ClearAll { deleted_documents: Some(count) })
|
||||
}
|
||||
otherwise => otherwise.default_details(),
|
||||
};
|
||||
}
|
||||
|
||||
Ok((tasks, None))
|
||||
}
|
||||
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||
// to a fresh thread.
|
||||
let mut content_files = Vec::new();
|
||||
for operation in &operations {
|
||||
match operation {
|
||||
DocumentOperation::Replace(content_uuid)
|
||||
| DocumentOperation::Update(content_uuid) => {
|
||||
let content_file = self.queue.file_store.get_update(*content_uuid)?;
|
||||
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
|
||||
content_files.push(mmap);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut content_files_iter = content_files.iter();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
for operation in operations {
|
||||
match operation {
|
||||
DocumentOperation::Replace(_content_uuid) => {
|
||||
let mmap = content_files_iter.next().unwrap();
|
||||
indexer
|
||||
.replace_documents(mmap)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
}
|
||||
DocumentOperation::Update(_content_uuid) => {
|
||||
let mmap = content_files_iter.next().unwrap();
|
||||
indexer
|
||||
.update_documents(mmap)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
}
|
||||
DocumentOperation::Delete(document_ids) => {
|
||||
let document_ids: bumpalo::collections::vec::Vec<_> = document_ids
|
||||
.iter()
|
||||
.map(|s| &*indexer_alloc.alloc_str(s))
|
||||
.collect_in(&indexer_alloc);
|
||||
indexer.delete_documents(document_ids.into_bump_slice());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
|
||||
let (document_changes, operation_stats, primary_key) = indexer
|
||||
.into_changes(
|
||||
&indexer_alloc,
|
||||
index,
|
||||
&rtxn,
|
||||
primary_key.as_deref(),
|
||||
&mut new_fields_ids_map,
|
||||
&|| must_stop_processing.get(),
|
||||
progress.clone(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
|
||||
let mut candidates_count = 0;
|
||||
for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
|
||||
candidates_count += stats.document_count;
|
||||
match stats.error {
|
||||
Some(error) => {
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(milli::Error::UserError(error).into());
|
||||
}
|
||||
None => task.status = Status::Succeeded,
|
||||
}
|
||||
|
||||
task.details = match task.details {
|
||||
Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => {
|
||||
Some(Details::DocumentAdditionOrUpdate {
|
||||
received_documents,
|
||||
indexed_documents: Some(stats.document_count),
|
||||
})
|
||||
}
|
||||
Some(Details::DocumentDeletion { provided_ids, .. }) => {
|
||||
Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(stats.document_count),
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// In the case of a `documentAdditionOrUpdate` or `DocumentDeletion`
|
||||
// the details MUST be set to either addition or deletion
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
progress.update_progress(DocumentOperationProgress::Indexing);
|
||||
let mut congestion = None;
|
||||
if tasks.iter().any(|res| res.error.is_none()) {
|
||||
congestion = Some(
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||
);
|
||||
|
||||
let addition = DocumentAdditionResult {
|
||||
indexed_documents: candidates_count,
|
||||
number_of_documents: index
|
||||
.number_of_documents(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
};
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
Ok((tasks, congestion))
|
||||
}
|
||||
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
||||
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
|
||||
|
||||
let (filter, code) = if let KindWithContent::DocumentEdition {
|
||||
filter_expr,
|
||||
context: _,
|
||||
function,
|
||||
..
|
||||
} = &task.kind
|
||||
{
|
||||
(filter_expr, function)
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let candidates = match filter.as_ref().map(Filter::from_json) {
|
||||
Some(Ok(Some(filter))) => filter
|
||||
.evaluate(index_wtxn, index)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
None | Some(Ok(None)) => index.documents_ids(index_wtxn)?,
|
||||
Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))),
|
||||
};
|
||||
|
||||
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
..
|
||||
}) = task.details
|
||||
{
|
||||
(original_filter, context, function)
|
||||
} else {
|
||||
// In the case of a `documentEdition` the details MUST be set
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
if candidates.is_empty() {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
deleted_documents: Some(0),
|
||||
edited_documents: Some(0),
|
||||
});
|
||||
|
||||
return Ok((vec![task], None));
|
||||
}
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
// candidates not empty => index not empty => a primary key is set
|
||||
let primary_key = index.primary_key(&rtxn)?.unwrap();
|
||||
|
||||
let primary_key =
|
||||
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
|
||||
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
|
||||
|
||||
let mut congestion = None;
|
||||
if task.error.is_none() {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
let candidates_count = candidates.len();
|
||||
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
|
||||
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||
let document_changes = pool
|
||||
.install(|| {
|
||||
indexer
|
||||
.into_changes(&primary_key)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))
|
||||
})
|
||||
.unwrap()?;
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||
congestion = Some(
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // cannot change primary key in DocumentEdition
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
|
||||
let addition = DocumentAdditionResult {
|
||||
indexed_documents: candidates_count,
|
||||
number_of_documents: index
|
||||
.number_of_documents(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
};
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
match result_count {
|
||||
Ok((deleted_documents, edited_documents)) => {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
deleted_documents: Some(deleted_documents),
|
||||
edited_documents: Some(edited_documents),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
task.status = Status::Failed;
|
||||
task.details = Some(Details::DocumentEdition {
|
||||
original_filter,
|
||||
context,
|
||||
function,
|
||||
deleted_documents: Some(0),
|
||||
edited_documents: Some(0),
|
||||
});
|
||||
task.error = Some(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok((vec![task], congestion))
|
||||
}
|
||||
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
||||
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
|
||||
|
||||
let mut to_delete = RoaringBitmap::new();
|
||||
let external_documents_ids = index.external_documents_ids();
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
let before = to_delete.len();
|
||||
task.status = Status::Succeeded;
|
||||
|
||||
match &task.kind {
|
||||
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
||||
for id in documents_ids {
|
||||
if let Some(id) = external_documents_ids.get(index_wtxn, id)? {
|
||||
to_delete.insert(id);
|
||||
}
|
||||
}
|
||||
let will_be_removed = to_delete.len() - before;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids: documents_ids.len(),
|
||||
deleted_documents: Some(will_be_removed),
|
||||
});
|
||||
}
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => {
|
||||
let before = to_delete.len();
|
||||
let filter = match Filter::from_json(filter_expr) {
|
||||
Ok(filter) => filter,
|
||||
Err(err) => {
|
||||
// theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(
|
||||
Error::from_milli(err, Some(index_uid.clone())).into(),
|
||||
);
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(filter) = filter {
|
||||
let candidates = filter
|
||||
.evaluate(index_wtxn, index)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())));
|
||||
match candidates {
|
||||
Ok(candidates) => to_delete |= candidates,
|
||||
Err(err) => {
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(err.into());
|
||||
}
|
||||
};
|
||||
}
|
||||
let will_be_removed = to_delete.len() - before;
|
||||
if let Some(Details::DocumentDeletionByFilter {
|
||||
original_filter: _,
|
||||
deleted_documents,
|
||||
}) = &mut task.details
|
||||
{
|
||||
*deleted_documents = Some(will_be_removed);
|
||||
} else {
|
||||
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
if to_delete.is_empty() {
|
||||
return Ok((tasks, None));
|
||||
}
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
// to_delete not empty => index not empty => primary key set
|
||||
let primary_key = index.primary_key(&rtxn)?.unwrap();
|
||||
|
||||
let primary_key =
|
||||
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
|
||||
let mut congestion = None;
|
||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
|
||||
let mut indexer = indexer::DocumentDeletion::new();
|
||||
let candidates_count = to_delete.len();
|
||||
indexer.delete_documents_by_docids(to_delete);
|
||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||
congestion = Some(
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
None, // document deletion never changes primary key
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
|
||||
let addition = DocumentAdditionResult {
|
||||
indexed_documents: candidates_count,
|
||||
number_of_documents: index
|
||||
.number_of_documents(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
};
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
Ok((tasks, congestion))
|
||||
}
|
||||
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
||||
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
|
||||
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
||||
let checked_settings = settings.clone().check();
|
||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||
|
||||
// We can apply the status right now and if an update fail later
|
||||
// the whole batch will be marked as failed.
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
Ok((tasks, None))
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
cleared_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
} => {
|
||||
let (mut import_tasks, _congestion) = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::DocumentClear {
|
||||
index_uid: index_uid.clone(),
|
||||
tasks: cleared_tasks,
|
||||
},
|
||||
progress,
|
||||
)?;
|
||||
|
||||
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||
index_wtxn,
|
||||
index,
|
||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||
progress,
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
tasks.append(&mut import_tasks);
|
||||
Ok((tasks, None))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,128 +0,0 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::{compression, VERSION_FILE_NAME};
|
||||
|
||||
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_snapshot(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
||||
|
||||
fs::create_dir_all(&self.scheduler.snapshots_path)?;
|
||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||
|
||||
// 1. Snapshot the version file.
|
||||
let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME);
|
||||
fs::copy(&self.scheduler.version_file_path, dst)?;
|
||||
|
||||
// 2. Snapshot the index-scheduler LMDB env
|
||||
//
|
||||
// When we call copy_to_path, LMDB opens a read transaction by itself,
|
||||
// we can't provide our own. It is an issue as we would like to know
|
||||
// the update files to copy but new ones can be enqueued between the copy
|
||||
// of the env and the new transaction we open to retrieve the enqueued tasks.
|
||||
// So we prefer opening a new transaction after copying the env and copy more
|
||||
// update files than not enough.
|
||||
//
|
||||
// Note that there cannot be any update files deleted between those
|
||||
// two read operations as the task processing is synchronous.
|
||||
|
||||
// 2.1 First copy the LMDB env of the index-scheduler
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||
let dst = temp_snapshot_dir.path().join("tasks");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 2.2 Create a read transaction on the index-scheduler
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2.3 Create the update files directory
|
||||
let update_files_dir = temp_snapshot_dir.path().join("update_files");
|
||||
fs::create_dir_all(&update_files_dir)?;
|
||||
|
||||
// 2.4 Only copy the update files of the enqueued tasks
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||
let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||
progress.update_progress(update_file_progress);
|
||||
for task_id in enqueued {
|
||||
let task =
|
||||
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
let src = self.queue.file_store.get_update_path(content_uuid);
|
||||
let dst = update_files_dir.join(content_uuid.to_string());
|
||||
fs::copy(src, dst)?;
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3. Snapshot every indexes
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||
let index_mapping = self.index_mapper.index_mapping;
|
||||
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||
|
||||
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
|
||||
let (name, uuid) = result?;
|
||||
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
|
||||
name, i as u32, nb_indexes,
|
||||
));
|
||||
let index = self.index_mapper.index(&rtxn, name)?;
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
fs::create_dir_all(&dst)?;
|
||||
index
|
||||
.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
|
||||
// 4. Snapshot the auth LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||
let dst = temp_snapshot_dir.path().join("auth");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 5. Copy and tarball the flat snapshot
|
||||
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
|
||||
// 5.1 Find the original name of the database
|
||||
// TODO find a better way to get this path
|
||||
let mut base_path = self.env.path().to_owned();
|
||||
base_path.pop();
|
||||
let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms");
|
||||
|
||||
// 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension
|
||||
let snapshot_path = self.scheduler.snapshots_path.join(format!("{}.snapshot", db_name));
|
||||
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.scheduler.snapshots_path)?;
|
||||
compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?;
|
||||
let file = temp_snapshot_file.persist(snapshot_path)?;
|
||||
|
||||
// 5.3 Change the permission to make the snapshot readonly
|
||||
let mut permissions = file.metadata()?.permissions();
|
||||
permissions.set_readonly(true);
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
#[allow(clippy::non_octal_unix_permissions)]
|
||||
// rwxrwxrwx
|
||||
permissions.set_mode(0b100100100);
|
||||
}
|
||||
|
||||
file.set_permissions(permissions)?;
|
||||
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_upgrade(
|
||||
&self,
|
||||
db_version: (u32, u32, u32),
|
||||
progress: Progress,
|
||||
) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::ProcessUpgrade)?;
|
||||
|
||||
enum UpgradeIndex {}
|
||||
let indexes = self.index_names()?;
|
||||
|
||||
for (i, uid) in indexes.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<UpgradeIndex>::new(
|
||||
format!("Upgrading index `{uid}`"),
|
||||
i as u32,
|
||||
indexes.len() as u32,
|
||||
));
|
||||
let index = self.index(uid)?;
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let regen_stats = milli::update::upgrade::upgrade(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
db_version,
|
||||
progress.clone(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
if regen_stats {
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
|
||||
// Release wtxn as soon as possible because it stops us from registering tasks
|
||||
let mut index_schd_wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut index_schd_wtxn, uid, &stats)?;
|
||||
index_schd_wtxn.commit()?;
|
||||
} else {
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
||||
@@ -1,10 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "jean bob"
|
||||
}
|
||||
]
|
||||
@@ -1,10 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "jean bob"
|
||||
}
|
||||
]
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "bob 0"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob 1"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "bob 2"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bob 3"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "bob 4"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"doggo": "bob 5"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"doggo": "bob 6"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"doggo": "bob 7"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"doggo": "bob 8"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"doggo": "bob 9"
|
||||
}
|
||||
]
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "bob 0"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob 1"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "bob 2"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bob 3"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "bob 4"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"doggo": "bob 5"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"doggo": "bob 6"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"doggo": "bob 7"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"doggo": "bob 8"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"doggo": "bob 9"
|
||||
}
|
||||
]
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "bob 0"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob 1"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "bob 2"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bob 3"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "bob 4"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"doggo": "bob 5"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"doggo": "bob 6"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"doggo": "bob 7"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"doggo": "bob 8"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"doggo": "bob 9"
|
||||
}
|
||||
]
|
||||
@@ -1,82 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
|
||||
3 {uid: 3, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }}
|
||||
4 {uid: 4, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }}
|
||||
5 {uid: 5, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }}
|
||||
6 {uid: 6, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }}
|
||||
7 {uid: 7, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }}
|
||||
8 {uid: 8, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }}
|
||||
9 {uid: 9, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,1,2,3,4,5,6,7,8,9,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,2,3,4,5,6,7,8,9,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 10, field_distribution: {"doggo": 10, "id": 10} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
[timestamp] [5,]
|
||||
[timestamp] [6,]
|
||||
[timestamp] [7,]
|
||||
[timestamp] [8,]
|
||||
[timestamp] [9,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,1,2,3,4,5,6,7,8,9,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,1,2,3,4,5,6,7,8,9,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"receivedDocuments":10,"indexedDocuments":10}, stats: {"totalNbTasks":10,"status":{"succeeded":10},"types":{"documentAdditionOrUpdate":10},"indexUids":{"doggos":10}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,1,2,3,4,5,6,7,8,9,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
succeeded [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_document_addition.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "bob 0"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob 1"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "bob 2"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bob 3"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "bob 4"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"doggo": "bob 5"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"doggo": "bob 6"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"doggo": "bob 7"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"doggo": "bob 8"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"doggo": "bob 9"
|
||||
}
|
||||
]
|
||||
@@ -1,51 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"settingsUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,10 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
||||
@@ -1,109 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
4 {uid: 4, batch_uid: 4, status: succeeded, details: { primary_key: Some("leaves") }, kind: IndexCreation { index_uid: "girafo", primary_key: Some("leaves") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,1,2,4,]
|
||||
failed [3,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"indexCreation" [1,2,3,4,]
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
catto [1,]
|
||||
doggo [2,3,]
|
||||
girafo [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
catto: { number_of_documents: 0, field_distribution: {} }
|
||||
doggo: { number_of_documents: 0, field_distribution: {} }
|
||||
girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
1 [1,]
|
||||
2 [2,]
|
||||
3 [3,]
|
||||
4 [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
succeeded [0,1,2,4,]
|
||||
failed [3,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
"indexCreation" [1,2,3,4,]
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
catto [1,]
|
||||
doggo [2,3,]
|
||||
girafo [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,112 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
4 {uid: 4, batch_uid: 4, status: succeeded, details: { primary_key: Some("leaves") }, kind: IndexCreation { index_uid: "girafo", primary_key: Some("leaves") }}
|
||||
5 {uid: 5, batch_uid: 5, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "types=upgradeDatabase" }, kind: TaskDeletion { query: "types=upgradeDatabase", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,2,4,5,]
|
||||
failed [3,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"indexCreation" [1,2,3,4,]
|
||||
"taskDeletion" [5,]
|
||||
"upgradeDatabase" []
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
catto [1,]
|
||||
doggo [2,3,]
|
||||
girafo [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
catto: { number_of_documents: 0, field_distribution: {} }
|
||||
doggo: { number_of_documents: 0, field_distribution: {} }
|
||||
girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, }
|
||||
5 {uid: 5, details: {"matchedTasks":1,"deletedTasks":1,"originalFilter":"types=upgradeDatabase"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
1 [1,]
|
||||
2 [2,]
|
||||
3 [3,]
|
||||
4 [4,]
|
||||
5 [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
succeeded [1,2,4,5,]
|
||||
failed [3,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
"indexCreation" [1,2,3,4,]
|
||||
"taskDeletion" [5,]
|
||||
"upgradeDatabase" []
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
catto [1,]
|
||||
doggo [2,3,]
|
||||
girafo [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,50 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,54 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"indexCreation" [1,]
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
catto [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,64 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"indexCreation" [1,]
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
catto [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,67 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,2,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"indexCreation" [1,2,]
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
catto [1,]
|
||||
doggo [2,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,71 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,2,3,]
|
||||
succeeded [0,]
|
||||
failed []
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"indexCreation" [1,2,3,]
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
catto [1,]
|
||||
doggo [2,3,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
succeeded [0,]
|
||||
failed []
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
"upgradeDatabase" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -1,931 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::settings::SettingEmbeddingSettings;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{
|
||||
index_creation_task, read_json, replace_document_import_task, sample_documents,
|
||||
};
|
||||
use crate::IndexScheduler;
|
||||
|
||||
#[test]
|
||||
fn insert_task_while_another_task_is_processing() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation");
|
||||
|
||||
// while the task is processing can we register another task?
|
||||
index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_is_processing() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
assert!(index_scheduler.is_task_processing().unwrap());
|
||||
}
|
||||
|
||||
/// We send a lot of tasks but notify the tasks scheduler only once as
|
||||
/// we send them very fast, we must make sure that they are all processed.
|
||||
#[test]
|
||||
fn process_tasks_inserted_without_new_signal() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_second_task");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_third_task");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn process_tasks_without_autobatching() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_deletion_undeleteable() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0);
|
||||
let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1);
|
||||
file0.persist().unwrap();
|
||||
file1.persist().unwrap();
|
||||
|
||||
let to_enqueue = [
|
||||
index_creation_task("catto", "mouse"),
|
||||
replace_document_import_task("catto", None, 0, documents_count0),
|
||||
replace_document_import_task("doggo", Some("bone"), 1, documents_count1),
|
||||
];
|
||||
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
// here we have registered all the tasks, but the index scheduler
|
||||
// has not progressed at all
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued");
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion {
|
||||
query: "test_query".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0, 1]),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
// again, no progress made at all, but one more task is registered
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued");
|
||||
|
||||
// now we create the first batch
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
// the task deletion should now be "processing"
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing");
|
||||
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
// after the task deletion is processed, no task should actually have been deleted,
|
||||
// because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable
|
||||
// the "task deletion" task should be marked as "succeeded" and, in its details, the
|
||||
// number of deleted tasks should be 0
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_deletion_deleteable() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0);
|
||||
let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1);
|
||||
file0.persist().unwrap();
|
||||
file1.persist().unwrap();
|
||||
|
||||
let to_enqueue = [
|
||||
replace_document_import_task("catto", None, 0, documents_count0),
|
||||
replace_document_import_task("doggo", Some("bone"), 1, documents_count1),
|
||||
];
|
||||
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
// first addition of documents should be successful
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed");
|
||||
|
||||
// Now we delete the first task
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion {
|
||||
query: "test_query".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_deletion_delete_same_task_twice() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0);
|
||||
let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1);
|
||||
file0.persist().unwrap();
|
||||
file1.persist().unwrap();
|
||||
|
||||
let to_enqueue = [
|
||||
replace_document_import_task("catto", None, 0, documents_count0),
|
||||
replace_document_import_task("doggo", Some("bone"), 1, documents_count1),
|
||||
];
|
||||
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
// first addition of documents should be successful
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed");
|
||||
|
||||
// Now we delete the first task multiple times in a row
|
||||
for _ in 0..2 {
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion {
|
||||
query: "test_query".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_and_index_deletion() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let content = r#"
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob"
|
||||
}"#;
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
let documents_count = read_json(content.as_bytes(), &mut file).unwrap();
|
||||
file.persist().unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_one_successful_batch(); // The index creation.
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "before_index_creation");
|
||||
handle.advance_one_successful_batch(); // // after the execution of the two tasks in a single batch.
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn do_not_batch_task_of_different_indexes() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
let index_names = ["doggos", "cattos", "girafos"];
|
||||
|
||||
for name in index_names {
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: name.to_string(), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
for name in index_names {
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentClear { index_uid: name.to_string() }, None, false)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
for _ in 0..(index_names.len() * 2) {
|
||||
handle.advance_one_successful_batch();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swap_indexes() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let to_enqueue = [
|
||||
index_creation_task("a", "id"),
|
||||
index_creation_task("b", "id"),
|
||||
index_creation_task("c", "id"),
|
||||
index_creation_task("d", "id"),
|
||||
];
|
||||
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_a");
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_b");
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_c");
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d");
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![
|
||||
IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) },
|
||||
IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) },
|
||||
],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered");
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed");
|
||||
|
||||
index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }, None, false).unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swap_indexes_errors() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let to_enqueue = [
|
||||
index_creation_task("a", "id"),
|
||||
index_creation_task("b", "id"),
|
||||
index_creation_task("c", "id"),
|
||||
index_creation_task("d", "id"),
|
||||
];
|
||||
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
handle.advance_n_successful_batches(4);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_index_creation");
|
||||
|
||||
let first_snap = snapshot_index_scheduler(&index_scheduler);
|
||||
snapshot!(first_snap, name: "initial_tasks_processed");
|
||||
|
||||
let err = index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![
|
||||
IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) },
|
||||
IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) },
|
||||
],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times.");
|
||||
|
||||
let second_snap = snapshot_index_scheduler(&index_scheduler);
|
||||
assert_eq!(first_snap, second_snap);
|
||||
|
||||
// Index `e` does not exist, but we don't check its existence yet
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![
|
||||
IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) },
|
||||
IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) },
|
||||
IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) },
|
||||
],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
// Now the first swap should have an error message saying `e` and `f` do not exist
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_and_index_deletion_on_unexisting_index() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let content = r#"
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob"
|
||||
}"#;
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
let documents_count = read_json(content.as_bytes(), &mut file).unwrap();
|
||||
file.persist().unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler));
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_enqueued_task() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0);
|
||||
file0.persist().unwrap();
|
||||
|
||||
let to_enqueue = [
|
||||
replace_document_import_task("catto", None, 0, documents_count0),
|
||||
KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
},
|
||||
];
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued");
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_succeeded_task() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0);
|
||||
file0.persist().unwrap();
|
||||
|
||||
let _ = index_scheduler
|
||||
.register(replace_document_import_task("catto", None, 0, documents_count0), None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed");
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_processing_task() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0);
|
||||
file0.persist().unwrap();
|
||||
|
||||
let _ = index_scheduler
|
||||
.register(replace_document_import_task("catto", None, 0, documents_count0), None, false)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing");
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered");
|
||||
// Now we check that we can reach the AbortedIndexation error handling
|
||||
handle.advance_till([AbortedIndexation]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation");
|
||||
|
||||
// handle.advance_till([Start, BatchCreated, BeforeProcessing, AfterProcessing]);
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_mix_of_tasks() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0);
|
||||
file0.persist().unwrap();
|
||||
let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1);
|
||||
file1.persist().unwrap();
|
||||
let (file2, documents_count2) = sample_documents(&index_scheduler, 2, 2);
|
||||
file2.persist().unwrap();
|
||||
|
||||
let to_enqueue = [
|
||||
replace_document_import_task("catto", None, 0, documents_count0),
|
||||
replace_document_import_task("beavero", None, 1, documents_count1),
|
||||
replace_document_import_task("wolfo", None, 2, documents_count2),
|
||||
];
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_processed");
|
||||
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0, 1, 2]),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued");
|
||||
|
||||
handle.advance_till([AbortedIndexation]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_settings_update() {
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use milli::update::Setting;
|
||||
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
|
||||
let mut embedders = BTreeMap::default();
|
||||
let embedding_settings = milli::vector::settings::EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::Rest),
|
||||
api_key: Setting::Set(S("My super secret")),
|
||||
url: Setting::Set(S("http://localhost:7777")),
|
||||
dimensions: Setting::Set(4),
|
||||
request: Setting::Set(serde_json::json!("{{text}}")),
|
||||
response: Setting::Set(serde_json::json!("{{embedding}}")),
|
||||
..Default::default()
|
||||
};
|
||||
embedders
|
||||
.insert(S("default"), SettingEmbeddingSettings { inner: Setting::Set(embedding_settings) });
|
||||
new_settings.embedders = Setting::Set(embedders);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings,
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task");
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap();
|
||||
let task = meilisearch_types::task_view::TaskView::from_task(&task);
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed");
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap();
|
||||
let task = meilisearch_types::task_view::TaskView::from_task(&task);
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
// has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
|
||||
insta::assert_snapshot!(name, @"default");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(config.embedder_options);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_new() {
|
||||
crate::IndexScheduler::test(true, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_get_stats() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 3,
|
||||
"failed": 0,
|
||||
"processing": 0,
|
||||
"succeeded": 0
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0,
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 2,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 0
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0,
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 1,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 1
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0,
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 0,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 2
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0,
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_processing_dump() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
|
||||
let dump_cancellation = KindWithContent::TaskCancelation {
|
||||
query: "cancel dump".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
};
|
||||
let _ = index_scheduler.register(dump_creation, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
|
||||
let _ = index_scheduler.register(dump_cancellation, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
|
||||
|
||||
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_and_list_index() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let index_creation =
|
||||
KindWithContent::IndexCreation { index_uid: S("kefir"), primary_key: None };
|
||||
let _ = index_scheduler.register(index_creation, None, false).unwrap();
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
// The index creation has not been started, the index should not exists
|
||||
|
||||
let err = index_scheduler.index("kefir").map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Index `kefir` not found.");
|
||||
let empty = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
||||
snapshot!(format!("{empty:?}"), @"(0, [])");
|
||||
|
||||
// After advancing just once the index should've been created, the wtxn has been released and commited
|
||||
// but the indexUpdate task has not been processed yet
|
||||
handle.advance_till([InsideProcessBatch]);
|
||||
|
||||
index_scheduler.index("kefir").unwrap();
|
||||
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
||||
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
|
||||
[
|
||||
1,
|
||||
[
|
||||
[
|
||||
"kefir",
|
||||
{
|
||||
"documents_database_stats": {
|
||||
"numberOfEntries": 0,
|
||||
"totalKeySize": 0,
|
||||
"totalValueSize": 0
|
||||
},
|
||||
"database_size": "[bytes]",
|
||||
"number_of_embeddings": 0,
|
||||
"number_of_embedded_documents": 0,
|
||||
"used_database_size": "[bytes]",
|
||||
"primary_key": null,
|
||||
"field_distribution": {},
|
||||
"created_at": "[date]",
|
||||
"updated_at": "[date]"
|
||||
}
|
||||
]
|
||||
]
|
||||
]
|
||||
"###);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,840 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use big_s::S;
|
||||
use insta::assert_json_snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||
use meilisearch_types::milli::{self, obkv_to_json};
|
||||
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use milli::update::IndexDocumentsMethod::*;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::read_json;
|
||||
use crate::IndexScheduler;
|
||||
|
||||
#[test]
|
||||
fn import_vectors() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
|
||||
let mut embedders = BTreeMap::default();
|
||||
let embedding_settings = milli::vector::settings::EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::Rest),
|
||||
api_key: Setting::Set(S("My super secret")),
|
||||
url: Setting::Set(S("http://localhost:7777")),
|
||||
dimensions: Setting::Set(384),
|
||||
request: Setting::Set(serde_json::json!("{{text}}")),
|
||||
response: Setting::Set(serde_json::json!("{{embedding}}")),
|
||||
..Default::default()
|
||||
};
|
||||
embedders.insert(
|
||||
S("A_fakerest"),
|
||||
SettingEmbeddingSettings { inner: Setting::Set(embedding_settings) },
|
||||
);
|
||||
|
||||
let embedding_settings = milli::vector::settings::EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||
document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")),
|
||||
..Default::default()
|
||||
};
|
||||
embedders.insert(
|
||||
S("B_small_hf"),
|
||||
SettingEmbeddingSettings { inner: Setting::Set(embedding_settings) },
|
||||
);
|
||||
|
||||
new_settings.embedders = Setting::Set(embedders);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings,
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap();
|
||||
let task = meilisearch_types::task_view::TaskView::from_task(&task);
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap();
|
||||
let task = meilisearch_types::task_view::TaskView::from_task(&task);
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||
let fakerest_name = name.clone();
|
||||
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
|
||||
configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
|
||||
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
|
||||
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
|
||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||
};
|
||||
|
||||
// add one doc, specifying vectors
|
||||
|
||||
let doc = serde_json::json!(
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
&fakerest_name: {
|
||||
// this will never trigger regeneration, which is good because we can't actually generate with
|
||||
// this embedder
|
||||
"regenerate": false,
|
||||
"embeddings": beagle_embed,
|
||||
},
|
||||
&simple_hf_name: {
|
||||
// this will be regenerated on updates
|
||||
"regenerate": true,
|
||||
"embeddings": lab_embed,
|
||||
},
|
||||
"noise": [0.1, 0.2, 0.3]
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0u128).unwrap();
|
||||
let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
|
||||
assert_eq!(documents_count, 1);
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: UpdateDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds");
|
||||
|
||||
// check embeddings
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let doc = obkv_to_json(
|
||||
&[
|
||||
fields_ids_map.id("doggo").unwrap(),
|
||||
fields_ids_map.id("breed").unwrap(),
|
||||
fields_ids_map.id("_vectors").unwrap(),
|
||||
],
|
||||
&fields_ids_map,
|
||||
doc,
|
||||
)
|
||||
.unwrap();
|
||||
assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
|
||||
}
|
||||
|
||||
// update the doc, specifying vectors
|
||||
|
||||
let doc = serde_json::json!(
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
}
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(1u128).unwrap();
|
||||
let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
|
||||
assert_eq!(documents_count, 1);
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: UpdateDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
|
||||
|
||||
{
|
||||
// check embeddings
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
// automatically changed to patou because set to regenerate
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
||||
// remained beagle
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let doc = obkv_to_json(
|
||||
&[
|
||||
fields_ids_map.id("doggo").unwrap(),
|
||||
fields_ids_map.id("breed").unwrap(),
|
||||
fields_ids_map.id("_vectors").unwrap(),
|
||||
],
|
||||
&fields_ids_map,
|
||||
doc,
|
||||
)
|
||||
.unwrap();
|
||||
assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_vectors_first_and_embedder_later() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "intel",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": vec![1; 384],
|
||||
"unknown embedder": vec![1, 2, 3],
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "max",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": {
|
||||
"regenerate": false,
|
||||
"embeddings": vec![2; 384],
|
||||
},
|
||||
"unknown embedder": vec![4, 5],
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "marcel",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": {
|
||||
"regenerate": true,
|
||||
"embeddings": vec![3; 384],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "sora",
|
||||
"_vectors": {
|
||||
"my_doggo_embedder": {
|
||||
"regenerate": true,
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap();
|
||||
snapshot!(documents_count, @"5");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
|
||||
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("my_doggo_embedder") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||
document_template: Setting::Set(S("{{doc.doggo}}")),
|
||||
..Default::default()
|
||||
}) }
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: false,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
handle.advance_one_successful_batch();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
// the all the vectors linked to the new specified embedder have been removed
|
||||
// Only the unknown embedders stays in the document DB
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
// even though we specified the vector for the ID 3, it shouldn't be marked
|
||||
// as user provided since we explicitely marked it as NOT user provided.
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "my_doggo_embedder",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: HuggingFace(
|
||||
EmbedderOptions {
|
||||
model: "sentence-transformers/all-MiniLM-L6-v2",
|
||||
revision: Some(
|
||||
"e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
),
|
||||
distribution: None,
|
||||
pooling: UseModel,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{{doc.doggo}}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[1, 2]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
// the document with the id 3 should keep its original embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embeddings = &embeddings["my_doggo_embedder"];
|
||||
|
||||
snapshot!(embeddings.len(), @"1");
|
||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||
|
||||
// If we update marcel it should regenerate its embedding automatically
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "marvel",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "sorry",
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(1_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap();
|
||||
snapshot!(documents_count, @"2");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: UpdateDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// the document with the id 3 should have its original embedding updated
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
|
||||
let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
|
||||
snapshot!(json_string!(doc), @r###"
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "marvel"
|
||||
}
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||
|
||||
// the document with the id 4 should generate an embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_document_containing_vector() {
|
||||
// 1. Add an embedder
|
||||
// 2. Push two documents containing a simple vector
|
||||
// 3. Delete the first document
|
||||
// 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore
|
||||
// 5. Clear the index
|
||||
// 6. The user defined roaring bitmap shouldn't contains the id of the second document
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("manual") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
|
||||
dimensions: Setting::Set(3),
|
||||
..Default::default()
|
||||
}) }
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
"_vectors": {
|
||||
"manual": vec![0, 0, 0],
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "intel",
|
||||
"_vectors": {
|
||||
"manual": vec![1, 1, 1],
|
||||
}
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap();
|
||||
snapshot!(documents_count, @"2");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: false,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: S("doggos"),
|
||||
documents_ids: vec![S("1")],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[0]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["manual"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_embedder_with_user_provided_vectors() {
|
||||
// 1. Add two embedders
|
||||
// 2. Push two documents containing a simple vector
|
||||
// 3. The documents must not contain the vectors after the update as they are in the vectors db
|
||||
// 3. Delete the embedders
|
||||
// 4. The documents contain the vectors again
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("manual") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
|
||||
dimensions: Setting::Set(3),
|
||||
..Default::default()
|
||||
}) },
|
||||
S("my_doggo_embedder") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||
document_template: Setting::Set(S("{{doc.doggo}}")),
|
||||
..Default::default()
|
||||
}) },
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let content = serde_json::json!(
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
"_vectors": {
|
||||
"manual": vec![0, 0, 0],
|
||||
"my_doggo_embedder": vec![1; 384],
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "intel",
|
||||
"_vectors": {
|
||||
"manual": vec![1, 1, 1],
|
||||
}
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap();
|
||||
let documents_count =
|
||||
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap();
|
||||
snapshot!(documents_count, @"2");
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: false,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
|
||||
}
|
||||
|
||||
{
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Set(maplit::btreemap! {
|
||||
S("manual") => SettingEmbeddingSettings { inner: Setting::Reset },
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
}
|
||||
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||
}
|
||||
|
||||
{
|
||||
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||
embedders: Setting::Reset,
|
||||
..Default::default()
|
||||
};
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings: Box::new(setting),
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
}
|
||||
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// FIXME: redaction
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
|
||||
}
|
||||
}
|
||||
@@ -1,327 +0,0 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use big_s::S;
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_types::milli::obkv_to_json;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::FilterableAttributesRule;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent};
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, read_json, FailureLocation};
|
||||
use crate::IndexScheduler;
|
||||
|
||||
#[test]
|
||||
fn fail_in_process_batch_for_index_creation() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register");
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// Still in the first iteration
|
||||
assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fail_in_process_batch_for_document_addition() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let content = r#"
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob"
|
||||
}"#;
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
let documents_count = read_json(content.as_bytes(), &mut file).unwrap();
|
||||
file.persist().unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
snapshot!(
|
||||
snapshot_index_scheduler(&index_scheduler),
|
||||
name: "document_addition_batch_created"
|
||||
);
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fail_in_update_task_after_process_batch_success_for_document_addition() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(
|
||||
true,
|
||||
vec![(1, FailureLocation::UpdatingTaskAfterProcessBatchSuccess { task_uid: 0 })],
|
||||
);
|
||||
|
||||
let content = r#"
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "bob"
|
||||
}"#;
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
let documents_count = read_json(content.as_bytes(), &mut file).unwrap();
|
||||
file.persist().unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
handle.advance_till([Start]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_succeeded_but_index_scheduler_not_updated");
|
||||
|
||||
handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_succeeded");
|
||||
|
||||
// At this point the next time the scheduler will try to progress it should encounter
|
||||
// a critical failure and have to wait for 1s before retrying anything.
|
||||
|
||||
let before_failure = Instant::now();
|
||||
handle.advance_till([Start]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_to_commit");
|
||||
let failure_duration = before_failure.elapsed();
|
||||
assert!(failure_duration.as_millis() >= 1000);
|
||||
|
||||
handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_successfully_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fail_in_process_batch_for_document_deletion() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
|
||||
new_settings.filterable_attributes =
|
||||
Setting::Set(vec![FilterableAttributesRule::Field(S("catto"))]);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings,
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let content = r#"[
|
||||
{ "id": 1, "doggo": "jean bob" },
|
||||
{ "id": 2, "catto": "jorts" },
|
||||
{ "id": 3, "doggo": "bork" }
|
||||
]"#;
|
||||
|
||||
let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
let documents_count = read_json(content.as_bytes(), &mut file).unwrap();
|
||||
file.persist().unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings");
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents");
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: S("doggos"),
|
||||
documents_ids: vec![S("1")],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
// This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: S("doggos"),
|
||||
filter_expr: serde_json::json!(true),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
// Should fail because the ids are not filterable
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: S("doggos"),
|
||||
filter_expr: serde_json::json!("id = 2"),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: S("doggos"),
|
||||
filter_expr: serde_json::json!("catto EXISTS"),
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions");
|
||||
|
||||
// Everything should be batched together
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents");
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn panic_in_process_batch_for_index_creation() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(1, FailureLocation::PanicInsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// Still in the first iteration
|
||||
assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1);
|
||||
// No matter what happens in process_batch, the index_scheduler should be internally consistent
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn upgrade_failure() {
|
||||
// By starting the index-scheduler at the v1.12.0 an upgrade task should be automatically enqueued
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test_with_custom_config(vec![(1, FailureLocation::ProcessUpgrade)], |_| {
|
||||
Some((1, 12, 0))
|
||||
});
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "register_automatic_upgrade_task");
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task_while_the_upgrade_task_is_enqueued");
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "upgrade_task_failed");
|
||||
|
||||
// We can still register tasks
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
// But the scheduler is down and won't process anything ever again
|
||||
handle.scheduler_is_down();
|
||||
|
||||
// =====> After a restart is it still working as expected?
|
||||
let (index_scheduler, mut handle) =
|
||||
handle.restart(index_scheduler, true, vec![(1, FailureLocation::ProcessUpgrade)], |_| {
|
||||
Some((1, 12, 0)) // the upgrade task should be rerun automatically and nothing else should be enqueued
|
||||
});
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "upgrade_task_failed_again");
|
||||
// We can still register tasks
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
// And the scheduler is still down and won't process anything ever again
|
||||
handle.scheduler_is_down();
|
||||
|
||||
// =====> After a rerestart and without failure can we upgrade the indexes and process the tasks
|
||||
let (index_scheduler, mut handle) =
|
||||
handle.restart(index_scheduler, true, vec![], |_| Some((1, 12, 0)));
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "upgrade_task_succeeded");
|
||||
// We can still register tasks
|
||||
let kind = index_creation_task("girafo", "leaves");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
// The scheduler is up and running
|
||||
handle.advance_one_successful_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
handle.advance_one_failed_batch(); // doggo already exists
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_everything");
|
||||
|
||||
let (upgrade_tasks_ids, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&crate::Query { types: Some(vec![Kind::UpgradeDatabase]), ..Default::default() },
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
// When deleting the single upgrade task it should remove the associated batch
|
||||
let _task = index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion {
|
||||
query: String::from("types=upgradeDatabase"),
|
||||
tasks: upgrade_tasks_ids,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_upgrade_tasks");
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: doc
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"doggo": "Intel",
|
||||
@@ -1,7 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: task.details
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"embedders": {
|
||||
@@ -1,7 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: doc
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"doggo": "kefir",
|
||||
@@ -1,7 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: fakerest_config.embedder_options
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user