mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-06-08 21:25:37 +00:00
Merge branch 'main' into key-actions-to-bitflags
This commit is contained in:
commit
c832797416
14
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
14
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@ -22,6 +22,20 @@ Related product discussion:
|
||||
|
||||
<!---If necessary, create a list with technical/product steps-->
|
||||
|
||||
### Are you modifying a database?
|
||||
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
|
||||
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
|
||||
|
||||
### Reminders when modifying the API
|
||||
|
||||
- [ ] Update the openAPI file with utoipa:
|
||||
- [ ] If a new module has been introduced, create a new structure deriving [the OpenAPI proc-macro](https://docs.rs/utoipa/latest/utoipa/derive.OpenApi.html) and nest it in the main [openAPI structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L64-L78).
|
||||
- [ ] If a new route has been introduced, add the [path decorator](https://docs.rs/utoipa/latest/utoipa/attr.path.html) to it and add the route at the top of the file in its openAPI structure.
|
||||
- [ ] If a structure which is deserialized or serialized in the API has been introduced or modified, it must derive the [`schema`](https://docs.rs/utoipa/latest/utoipa/macro.schema.html) or the [`IntoParams`](https://docs.rs/utoipa/latest/utoipa/derive.IntoParams.html) proc-macro.
|
||||
If it's a **new** structure you must also add it to the big list of structures [in the main `OpenApi` structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L88).
|
||||
- [ ] Once everything is done, start Meilisearch with the swagger flag: `cargo run --features swagger`, open `http://localhost:7700/scalar` on your browser, and ensure everything works as expected.
|
||||
- For more info, refer to [this presentation](https://pitch.com/v/generating-the-openapi-file-jrn3nh).
|
||||
|
||||
### Reminders when modifying the Setting API
|
||||
|
||||
<!--- Special steps to remind when adding a new index setting -->
|
||||
|
7
.github/workflows/bench-manual.yml
vendored
7
.github/workflows/bench-manual.yml
vendored
@ -4,9 +4,9 @@ on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
workload:
|
||||
description: 'The path to the workloads to execute (workloads/...)'
|
||||
description: "The path to the workloads to execute (workloads/...)"
|
||||
required: true
|
||||
default: 'workloads/movies.json'
|
||||
default: "workloads/movies.json"
|
||||
|
||||
env:
|
||||
WORKLOAD_NAME: ${{ github.event.inputs.workload }}
|
||||
@ -18,11 +18,10 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
|
||||
run: |
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Manual [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- ${WORKLOAD_NAME}
|
||||
|
||||
|
2
.github/workflows/bench-pr.yml
vendored
2
.github/workflows/bench-pr.yml
vendored
@ -66,7 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
3
.github/workflows/bench-push-indexing.yml
vendored
3
.github/workflows/bench-push-indexing.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@ -20,4 +20,3 @@ jobs:
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
||||
run: |
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Push on `main` [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- workloads/*.json
|
||||
|
||||
|
8
.github/workflows/benchmarks-manual.yml
vendored
8
.github/workflows/benchmarks-manual.yml
vendored
@ -4,9 +4,9 @@ on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dataset_name:
|
||||
description: 'The name of the dataset used to benchmark (search_songs, search_wiki, search_geo or indexing)'
|
||||
description: "The name of the dataset used to benchmark (search_songs, search_wiki, search_geo or indexing)"
|
||||
required: false
|
||||
default: 'search_songs'
|
||||
default: "search_songs"
|
||||
|
||||
env:
|
||||
BENCH_NAME: ${{ github.event.inputs.dataset_name }}
|
||||
@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@ -67,7 +67,7 @@ jobs:
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
- name: "README: compare with another benchmark"
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
|
2
.github/workflows/benchmarks-pr.yml
vendored
2
.github/workflows/benchmarks-pr.yml
vendored
@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@ -69,7 +69,7 @@ jobs:
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
- name: "README: compare with another benchmark"
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
|
@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@ -68,7 +68,7 @@ jobs:
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
- name: "README: compare with another benchmark"
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
|
@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@ -68,7 +68,7 @@ jobs:
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
- name: "README: compare with another benchmark"
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
|
@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@ -68,7 +68,7 @@ jobs:
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
- name: "README: compare with another benchmark"
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
|
100
.github/workflows/check-valid-milestone.yml
vendored
Normal file
100
.github/workflows/check-valid-milestone.yml
vendored
Normal file
@ -0,0 +1,100 @@
|
||||
name: PR Milestone Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
|
||||
branches:
|
||||
- "main"
|
||||
- "release-v*.*.*"
|
||||
|
||||
jobs:
|
||||
check-milestone:
|
||||
name: Check PR Milestone
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Validate PR milestone
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
// Get PR number directly from the event payload
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
|
||||
// Get PR details
|
||||
const { data: prData } = await github.rest.pulls.get({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
pull_number: prNumber
|
||||
});
|
||||
|
||||
// Get base branch name
|
||||
const baseBranch = prData.base.ref;
|
||||
console.log(`Base branch: ${baseBranch}`);
|
||||
|
||||
// Get PR milestone
|
||||
const prMilestone = prData.milestone;
|
||||
if (!prMilestone) {
|
||||
core.setFailed('PR must have a milestone assigned');
|
||||
return;
|
||||
}
|
||||
console.log(`PR milestone: ${prMilestone.title}`);
|
||||
|
||||
// Validate milestone format: vx.y.z
|
||||
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
|
||||
if (!milestoneRegex.test(prMilestone.title)) {
|
||||
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
|
||||
return;
|
||||
}
|
||||
|
||||
// For main branch PRs, check if the milestone is the highest one
|
||||
if (baseBranch === 'main') {
|
||||
// Get all milestones
|
||||
const { data: milestones } = await github.rest.issues.listMilestones({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
state: 'open',
|
||||
sort: 'due_on',
|
||||
direction: 'desc'
|
||||
});
|
||||
|
||||
// Sort milestones by version number (vx.y.z)
|
||||
const sortedMilestones = milestones
|
||||
.filter(m => milestoneRegex.test(m.title))
|
||||
.sort((a, b) => {
|
||||
const versionA = a.title.substring(1).split('.').map(Number);
|
||||
const versionB = b.title.substring(1).split('.').map(Number);
|
||||
|
||||
// Compare major version
|
||||
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
|
||||
// Compare minor version
|
||||
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
|
||||
// Compare patch version
|
||||
return versionB[2] - versionA[2];
|
||||
});
|
||||
|
||||
if (sortedMilestones.length === 0) {
|
||||
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
|
||||
return;
|
||||
}
|
||||
|
||||
const highestMilestone = sortedMilestones[0];
|
||||
console.log(`Highest milestone: ${highestMilestone.title}`);
|
||||
|
||||
if (prMilestone.title !== highestMilestone.title) {
|
||||
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// For release branches, the milestone should match the branch version
|
||||
const branchVersion = baseBranch.substring(8); // remove 'release-'
|
||||
if (prMilestone.title !== branchVersion) {
|
||||
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('PR milestone validation passed!');
|
57
.github/workflows/db-change-comments.yml
vendored
Normal file
57
.github/workflows/db-change-comments.yml
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
name: Comment when db change labels are added
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
|
||||
env:
|
||||
MESSAGE: |
|
||||
### Hello, I'm a bot 🤖
|
||||
|
||||
You are receiving this message because you declared that this PR make changes to the Meilisearch database.
|
||||
Depending on the nature of the change, additional actions might be required on your part. The following sections detail the additional actions depending on the nature of the change, please copy the relevant section in the description of your PR, and make sure to perform the required actions.
|
||||
|
||||
Thank you for contributing to Meilisearch :heart:
|
||||
|
||||
## This PR makes forward-compatible changes
|
||||
|
||||
*Forward-compatible changes are changes to the database such that databases created in an older version of Meilisearch are still valid in the new version of Meilisearch. They usually represent additive changes, like adding a new optional attribute or setting.*
|
||||
|
||||
- [ ] Detail the change to the DB format and why they are forward compatible
|
||||
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
|
||||
|
||||
|
||||
## This PR makes breaking changes
|
||||
|
||||
*Breaking changes are changes to the database such that databases created in an older version of Meilisearch need changes to remain valid in the new version of Meilisearch. This typically happens when the way to store the data changed (change of database, new required key, etc). This can also happen due to breaking changes in the API of an experimental feature. ⚠️ This kind of changes are more difficult to achieve safely, so proceed with caution and test dumpless upgrade right before merging the PR.*
|
||||
|
||||
- [ ] Detail the changes to the DB format,
|
||||
- [ ] which are compatible, and why
|
||||
- [ ] which are not compatible, why, and how they will be fixed up in the upgrade
|
||||
- [ ] /!\ Ensure all the read operations still work!
|
||||
- If the change happened in milli, you may need to check the version of the database before doing any read operation
|
||||
- If the change happened in the index-scheduler, make sure the new code can immediately read the old database
|
||||
- If the change happened in the meilisearch-auth database, reach out to the team; we don't know yet how to handle these changes
|
||||
- [ ] Write the code to go from the old database to the new one
|
||||
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
|
||||
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
|
||||
- [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
|
||||
|
||||
|
||||
jobs:
|
||||
add-comment:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.label.name == 'db change'
|
||||
steps:
|
||||
- name: Add comment
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const message = process.env.MESSAGE;
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: message
|
||||
})
|
28
.github/workflows/db-change-missing.yml
vendored
Normal file
28
.github/workflows/db-change-missing.yml
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
name: Check db change labels
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, labeled, unlabeled]
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
check-labels:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Check db change labels
|
||||
id: check_labels
|
||||
run: |
|
||||
URL=/repos/meilisearch/meilisearch/pulls/${{ github.event.pull_request.number }}/labels
|
||||
echo ${{ github.event.pull_request.number }}
|
||||
echo $URL
|
||||
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/meilisearch/meilisearch/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
|
||||
if [[ ! "$LABELS" =~ "db change" && ! "$LABELS" =~ "no db change" ]]; then
|
||||
echo "::error::Pull request must contain either the 'db change' or 'no db change' label."
|
||||
exit 1
|
||||
else
|
||||
echo "The label is set"
|
||||
fi
|
6
.github/workflows/flaky-tests.yml
vendored
6
.github/workflows/flaky-tests.yml
vendored
@ -9,15 +9,15 @@ jobs:
|
||||
flaky:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
|
2
.github/workflows/fuzzer-indexing.yml
vendored
2
.github/workflows/fuzzer-indexing.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
42
.github/workflows/milestone-workflow.yml
vendored
42
.github/workflows/milestone-workflow.yml
vendored
@ -5,6 +5,7 @@ name: Milestone's workflow
|
||||
# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
|
||||
# - the roadmap issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/roadmap-issue.md
|
||||
# - the changelog issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/changelog-issue.md
|
||||
# - update the ruleset to add the current release version to the list of allowed versions and be able to use the merge queue.
|
||||
|
||||
# For each Milestone closed
|
||||
# - the `release_version` label is created
|
||||
@ -21,10 +22,9 @@ env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
|
||||
# -----------------
|
||||
# MILESTONE CREATED
|
||||
# -----------------
|
||||
# -----------------
|
||||
# MILESTONE CREATED
|
||||
# -----------------
|
||||
|
||||
get-release-version:
|
||||
if: github.event.action == 'created'
|
||||
@ -148,9 +148,37 @@ jobs:
|
||||
--body-file $ISSUE_TEMPLATE \
|
||||
--milestone $MILESTONE_VERSION
|
||||
|
||||
# ----------------
|
||||
# MILESTONE CLOSED
|
||||
# ----------------
|
||||
update-ruleset:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install jq
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y jq
|
||||
- name: Update ruleset
|
||||
env:
|
||||
# gh api repos/meilisearch/meilisearch/rulesets --jq '.[] | {name: .name, id: .id}'
|
||||
RULESET_ID: 4253297
|
||||
BRANCH_NAME: ${{ github.event.inputs.branch_name }}
|
||||
run: |
|
||||
# Get current ruleset conditions
|
||||
CONDITIONS=$(gh api repos/meilisearch/meilisearch/rulesets/$RULESET_ID --jq '{ conditions: .conditions }')
|
||||
|
||||
# Update the conditions by appending the milestone version
|
||||
UPDATED_CONDITIONS=$(echo $CONDITIONS | jq '.conditions.ref_name.include += ["refs/heads/release-'$MILESTONE_VERSION'"]')
|
||||
|
||||
# Update the ruleset from stdin (-)
|
||||
echo $UPDATED_CONDITIONS |
|
||||
gh api repos/meilisearch/meilisearch/rulesets/$RULESET_ID \
|
||||
--method PUT \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
--input -
|
||||
|
||||
# ----------------
|
||||
# MILESTONE CLOSED
|
||||
# ----------------
|
||||
|
||||
create-release-label:
|
||||
if: github.event.action == 'closed'
|
||||
|
6
.github/workflows/publish-apt-brew-pkg.yml
vendored
6
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -18,14 +18,14 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
|
18
.github/workflows/publish-binaries.yml
vendored
18
.github/workflows/publish-binaries.yml
vendored
@ -3,7 +3,7 @@ name: Publish binaries to GitHub release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 2 * * *' # Every day at 2:00am
|
||||
- cron: "0 2 * * *" # Every day at 2:00am
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
@ -37,15 +37,15 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@ -75,7 +75,7 @@ jobs:
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@ -101,7 +101,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@ -127,8 +127,8 @@ jobs:
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
@ -148,7 +148,7 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
|
2
.github/workflows/sdks-tests.yml
vendored
2
.github/workflows/sdks-tests.yml
vendored
@ -52,7 +52,7 @@ jobs:
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: "6.0.x"
|
||||
dotnet-version: "8.0.x"
|
||||
- name: Install dependencies
|
||||
run: dotnet restore
|
||||
- name: Build
|
||||
|
86
.github/workflows/test-suite.yml
vendored
86
.github/workflows/test-suite.yml
vendored
@ -4,13 +4,9 @@ on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: '0 5 * * *'
|
||||
- cron: "0 5 * * *"
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
merge_group:
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@ -19,21 +15,21 @@ env:
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-20.04
|
||||
name: Tests on ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -55,8 +51,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -72,8 +68,8 @@ jobs:
|
||||
name: Tests almost all features
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@ -81,19 +77,51 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
- name: Run cargo test with almost all features
|
||||
run: |
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
|
||||
ollama-ubuntu:
|
||||
name: Test with Ollama
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Install Ollama
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
||||
- name: Start serving
|
||||
run: |
|
||||
# Run it in the background, there is no way to daemonise at the moment
|
||||
ollama serve &
|
||||
|
||||
# A short pause is required before the HTTP port is opened
|
||||
sleep 5
|
||||
|
||||
# This endpoint blocks until ready
|
||||
time curl -i http://localhost:11434
|
||||
|
||||
- name: Pull nomic-embed-text & all-minilm
|
||||
run: |
|
||||
ollama pull nomic-embed-text
|
||||
ollama pull all-minilm
|
||||
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all --features test-ollama ollama
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:20.04
|
||||
image: ubuntu:22.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@ -101,7 +129,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@ -117,17 +145,17 @@ jobs:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -139,12 +167,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -156,14 +184,14 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
@ -4,7 +4,7 @@ on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
new_version:
|
||||
description: 'The new version (vX.Y.Z)'
|
||||
description: "The new version (vX.Y.Z)"
|
||||
required: true
|
||||
|
||||
env:
|
||||
@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Install sd
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -10,6 +10,7 @@
|
||||
/dumps
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
/benchmarks
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
|
@ -95,6 +95,11 @@ Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) wo
|
||||
|
||||
Run `cargo xtask --help` from the root of the repository to find out what is available.
|
||||
|
||||
#### Update the openAPI file if the API changed
|
||||
|
||||
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
|
||||
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
|
||||
|
||||
### Logging
|
||||
|
||||
Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message.
|
||||
@ -145,7 +150,7 @@ Some notes on GitHub PRs:
|
||||
- The PR title should be accurate and descriptive of the changes.
|
||||
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
|
||||
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
|
||||
## Release Process (for internal team only)
|
||||
|
||||
@ -153,8 +158,7 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
|
||||
|
||||
### Automation to rebase and Merge the PRs
|
||||
|
||||
This project integrates a bot that helps us manage pull requests merging.<br>
|
||||
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
|
||||
This project uses GitHub Merge Queues that helps us manage pull requests merging.
|
||||
|
||||
### How to Publish a new Release
|
||||
|
||||
|
1514
Cargo.lock
generated
1514
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
11
Cargo.toml
@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.12.0"
|
||||
version = "1.14.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
@ -36,6 +36,12 @@ license = "MIT"
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
|
||||
# We now compile heed without the NDEBUG define for better performance.
|
||||
# However, we still enable debug assertions for a better detection of
|
||||
# disk corruption on the cloud or in OSS.
|
||||
[profile.release.package.heed]
|
||||
debug-assertions = true
|
||||
|
||||
[profile.dev.package.flate2]
|
||||
opt-level = 3
|
||||
|
||||
@ -43,6 +49,3 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[patch.crates-io]
|
||||
roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.79.0-alpine3.20 AS compiler
|
||||
FROM rust:1.85-alpine3.20 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2024 Meili SAS
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
@ -20,7 +20,13 @@
|
||||
<p align="center">
|
||||
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
|
||||
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a>
|
||||
</p>
|
||||
|
||||
<p align="center" name="ph-banner">
|
||||
<a href="https://www.producthunt.com/posts/meilisearch-ai">
|
||||
<img src="assets/ph-banner.png" alt="Meilisearch AI-powered search general availability announcement on ProductHunt">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
@ -58,6 +64,7 @@ See the list of all our example apps in our [demos repository](https://github.co
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **AI-ready:** works out of the box with [langchain](https://www.meilisearch.com/with/langchain) and the [model context protocol](https://github.com/meilisearch/meilisearch-mcp)
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
@ -1403,6 +1403,104 @@
|
||||
"title": "Number of tasks by indexes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 15,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 51
|
||||
},
|
||||
"id": 29,
|
||||
"interval": "5s",
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.1.4",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_task_queue_latency_seconds{instance=\"$instance\", job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "{{value}} ",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Task queue latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"datasource": {
|
||||
|
BIN
assets/ph-banner.png
Normal file
BIN
assets/ph-banner.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 578 KiB |
11
bors.toml
11
bors.toml
@ -1,11 +0,0 @@
|
||||
status = [
|
||||
'Tests on ubuntu-20.04',
|
||||
'Tests on macos-13',
|
||||
'Tests on windows-2022',
|
||||
'Run Clippy',
|
||||
'Run Rustfmt',
|
||||
'Run tests in debug',
|
||||
]
|
||||
pr_status = ['Milestone Check']
|
||||
# 3 hours timeout
|
||||
timeout-sec = 10800
|
@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
csv = "1.3.0"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.14.0"
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.6"
|
||||
roaring = "0.10.10"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
bytes = "1.6.0"
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.30"
|
||||
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,9 +2,10 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@ -20,8 +21,10 @@ fn base_conf(builder: &mut Settings) {
|
||||
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
let filterable_fields = ["_geo", "population", "elevation"]
|
||||
.iter()
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields =
|
||||
|
@ -2,9 +2,10 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@ -21,7 +22,7 @@ fn base_conf(builder: &mut Settings) {
|
||||
|
||||
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
@ -10,8 +10,9 @@ use bumpalo::Bump;
|
||||
use criterion::BenchmarkId;
|
||||
use memmap2::Mmap;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
@ -64,10 +65,11 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
}
|
||||
create_dir_all(conf.database_name).unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
let index = Index::new(options, conf.database_name, true).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
@ -98,8 +100,8 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@ -110,13 +112,14 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -124,7 +127,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.36", features = ["parsing"] }
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
vergen-git2 = "1.0.0"
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
|
@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
flate2 = "1.0.30"
|
||||
http = "1.1.0"
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
once_cell = "1.20.2"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
@ -10,8 +10,10 @@ dump
|
||||
├── instance-uid.uuid
|
||||
├── keys.jsonl
|
||||
├── metadata.json
|
||||
└── tasks
|
||||
├── update_files
|
||||
│ └── [task_id].jsonl
|
||||
├── tasks
|
||||
│ ├── update_files
|
||||
│ │ └── [task_id].jsonl
|
||||
│ └── queue.jsonl
|
||||
└── batches
|
||||
└── queue.jsonl
|
||||
```
|
@ -141,6 +141,9 @@ pub enum KindDump {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@ -210,6 +213,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindDump::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -222,14 +228,16 @@ pub(crate) mod test {
|
||||
|
||||
use big_s::S;
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::tasks::{Details, Status};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{Details, Kind, Status};
|
||||
use serde_json::{json, Map, Value};
|
||||
use time::macros::datetime;
|
||||
use uuid::Uuid;
|
||||
@ -271,7 +279,10 @@ pub(crate) mod test {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
|
||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
|
||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
||||
filterable_attributes: Setting::Set(vec![
|
||||
FilterableAttributesRule::Field(S("race")),
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
]),
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
@ -292,11 +303,39 @@ pub(crate) mod test {
|
||||
embedders: Setting::NotSet,
|
||||
search_cutoff_ms: Setting::NotSet,
|
||||
localized_attributes: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
};
|
||||
settings.check()
|
||||
}
|
||||
|
||||
pub fn create_test_batches() -> Vec<Batch> {
|
||||
vec![Batch {
|
||||
uid: 0,
|
||||
details: DetailsView {
|
||||
received_documents: Some(12),
|
||||
indexed_documents: Some(Some(10)),
|
||||
..DetailsView::default()
|
||||
},
|
||||
progress: None,
|
||||
stats: BatchStats {
|
||||
total_nb_tasks: 1,
|
||||
status: maplit::btreemap! { Status::Succeeded => 1 },
|
||||
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
|
||||
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
|
||||
progress_trace: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
},
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||
}),
|
||||
started_at: datetime!(2022-11-20 0:00 UTC),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
}]
|
||||
}
|
||||
|
||||
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
|
||||
vec![
|
||||
(
|
||||
@ -419,6 +458,15 @@ pub(crate) mod test {
|
||||
index.flush().unwrap();
|
||||
index.settings(&settings).unwrap();
|
||||
|
||||
// ========== pushing the batch queue
|
||||
let batches = create_test_batches();
|
||||
|
||||
let mut batch_queue = dump.create_batches_queue().unwrap();
|
||||
for batch in &batches {
|
||||
batch_queue.push_batch(batch).unwrap();
|
||||
}
|
||||
batch_queue.flush().unwrap();
|
||||
|
||||
// ========== pushing the task queue
|
||||
let tasks = create_test_tasks();
|
||||
|
||||
@ -447,6 +495,10 @@ pub(crate) mod test {
|
||||
|
||||
dump.create_experimental_features(features).unwrap();
|
||||
|
||||
// ========== network
|
||||
let network = create_test_network();
|
||||
dump.create_network(network).unwrap();
|
||||
|
||||
// create the dump
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
dump.persist_to(&mut file).unwrap();
|
||||
@ -456,7 +508,14 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
fn create_test_features() -> RuntimeTogglableFeatures {
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
RuntimeTogglableFeatures::default()
|
||||
}
|
||||
|
||||
fn create_test_network() -> Network {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -507,5 +566,9 @@ pub(crate) mod test {
|
||||
// ==== checking the features
|
||||
let expected = create_test_features();
|
||||
assert_eq!(dump.features().unwrap().unwrap(), expected);
|
||||
|
||||
// ==== checking the network
|
||||
let expected = create_test_network();
|
||||
assert_eq!(&expected, dump.network().unwrap().unwrap());
|
||||
}
|
||||
}
|
||||
|
@ -196,6 +196,10 @@ impl CompatV5ToV6 {
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CompatIndexV5ToV6 {
|
||||
@ -318,7 +322,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v6::Settings {
|
||||
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
|
||||
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
|
||||
filterable_attributes: settings.filterable_attributes.into(),
|
||||
filterable_attributes: match settings.filterable_attributes {
|
||||
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
|
||||
filterable_attributes
|
||||
.into_iter()
|
||||
.map(v6::FilterableAttributesRule::Field)
|
||||
.collect(),
|
||||
),
|
||||
v5::settings::Setting::Reset => v6::Setting::Reset,
|
||||
v5::settings::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
sortable_attributes: settings.sortable_attributes.into(),
|
||||
ranking_rules: {
|
||||
match settings.ranking_rules {
|
||||
@ -382,6 +395,8 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
embedders: v6::Setting::NotSet,
|
||||
localized_attributes: v6::Setting::NotSet,
|
||||
search_cutoff_ms: v6::Setting::NotSet,
|
||||
facet_search: v6::Setting::NotSet,
|
||||
prefix_search: v6::Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ mod v6;
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum DumpReader {
|
||||
Current(V6Reader),
|
||||
Compat(CompatV5ToV6),
|
||||
@ -101,6 +102,13 @@ impl DumpReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.batches()),
|
||||
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.keys()),
|
||||
@ -114,6 +122,13 @@ impl DumpReader {
|
||||
DumpReader::Compat(compat) => compat.features(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.network()),
|
||||
DumpReader::Compat(compat) => compat.network(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V6Reader> for DumpReader {
|
||||
@ -219,6 +234,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -327,10 +346,8 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
assert_eq!(dump.network().unwrap(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -342,6 +359,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -373,10 +394,28 @@ pub(crate) mod test {
|
||||
|
||||
assert_eq!(test.documents().unwrap().count(), 1);
|
||||
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v6_network() {
|
||||
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
|
||||
let dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// network
|
||||
|
||||
let network = dump.network().unwrap().unwrap();
|
||||
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -388,6 +427,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -468,6 +511,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -545,6 +592,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -638,6 +689,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -731,6 +786,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -807,6 +866,10 @@ pub(crate) mod test {
|
||||
assert_eq!(dump.date(), None);
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
source: crates/dump/src/reader/mod.rs
|
||||
expression: vector_index.settings().unwrap()
|
||||
---
|
||||
{
|
||||
@ -49,6 +49,7 @@ expression: vector_index.settings().unwrap()
|
||||
"source": "huggingFace",
|
||||
"model": "BAAI/bge-base-en-v1.5",
|
||||
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
|
||||
"pooling": "forceMean",
|
||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
|
||||
}
|
||||
},
|
||||
|
@ -108,7 +108,7 @@ where
|
||||
/// not supported on untagged enums.
|
||||
struct StarOrVisitor<T>(PhantomData<T>);
|
||||
|
||||
impl<'de, T, FE> Visitor<'de> for StarOrVisitor<T>
|
||||
impl<T, FE> Visitor<'_> for StarOrVisitor<T>
|
||||
where
|
||||
T: FromStr<Err = FE>,
|
||||
FE: Display,
|
||||
|
@ -99,7 +99,7 @@ impl Task {
|
||||
/// Return true when a task is finished.
|
||||
/// A task is finished when its last state is either `Succeeded` or `Failed`.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
self.events.last().map_or(false, |event| {
|
||||
self.events.last().is_some_and(|event| {
|
||||
matches!(event, TaskEvent::Succeded { .. } | TaskEvent::Failed { .. })
|
||||
})
|
||||
}
|
||||
|
@ -108,7 +108,7 @@ where
|
||||
/// not supported on untagged enums.
|
||||
struct StarOrVisitor<T>(PhantomData<T>);
|
||||
|
||||
impl<'de, T, FE> Visitor<'de> for StarOrVisitor<T>
|
||||
impl<T, FE> Visitor<'_> for StarOrVisitor<T>
|
||||
where
|
||||
T: FromStr<Err = FE>,
|
||||
FE: Display,
|
||||
|
@ -114,7 +114,7 @@ impl Task {
|
||||
/// Return true when a task is finished.
|
||||
/// A task is finished when its last state is either `Succeeded` or `Failed`.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
self.events.last().map_or(false, |event| {
|
||||
self.events.last().is_some_and(|event| {
|
||||
matches!(event, TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. })
|
||||
})
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
|
||||
pub use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::vector::hf::OverridePooling;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
@ -18,8 +19,10 @@ pub type Checked = meilisearch_types::settings::Checked;
|
||||
pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
@ -43,13 +46,17 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||
pub type Code = meilisearch_types::error::Code;
|
||||
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
|
||||
|
||||
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Option<Uuid>,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
batches: Option<BufReader<File>>,
|
||||
keys: BufReader<File>,
|
||||
features: Option<RuntimeTogglableFeatures>,
|
||||
network: Option<Network>,
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
@ -77,13 +84,38 @@ impl V6Reader {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
|
||||
Ok(file) => Some(BufReader::new(file)),
|
||||
// The batch file was only introduced during the v1.13, anything prior to that won't have batches
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => None,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let network_file = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(network_file),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
let network = if let Some(network_file) = network_file {
|
||||
Some(serde_json::from_reader(&*network_file)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
batches,
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
features,
|
||||
network,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
@ -124,7 +156,7 @@ impl V6Reader {
|
||||
&mut self,
|
||||
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Task = serde_json::from_str(&line?).unwrap();
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
|
||||
let update_file_path = self
|
||||
.dump
|
||||
@ -136,8 +168,7 @@ impl V6Reader {
|
||||
if update_file_path.exists() {
|
||||
Ok((
|
||||
task,
|
||||
Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
|
||||
as Box<super::UpdateFile>),
|
||||
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
|
||||
))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
@ -145,6 +176,16 @@ impl V6Reader {
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
|
||||
match self.batches.as_mut() {
|
||||
Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
|
||||
let batch = serde_json::from_str(&line?)?;
|
||||
Ok(batch)
|
||||
})),
|
||||
None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
|
||||
Box::new(
|
||||
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
@ -154,6 +195,10 @@ impl V6Reader {
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Option<&Network> {
|
||||
self.network.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
@ -210,7 +255,29 @@ impl V6IndexReader {
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
patch_embedders(&mut settings);
|
||||
Ok(settings.check())
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_embedders(settings: &mut Settings<Unchecked>) {
|
||||
if let Setting::Set(embedders) = &mut settings.embedders {
|
||||
for settings in embedders.values_mut() {
|
||||
let Setting::Set(settings) = &mut settings.inner else {
|
||||
continue;
|
||||
};
|
||||
if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
settings.pooling = match settings.pooling {
|
||||
Setting::Set(pooling) => Setting::Set(pooling),
|
||||
// if the pooling for a hugging face embedder is not set, force it to `forceMean`
|
||||
// for backward compatibility with v1.13
|
||||
// dumps created in v1.14 and up will have the setting set for hugging face embedders
|
||||
Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,8 @@ use std::path::PathBuf;
|
||||
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
@ -54,6 +55,10 @@ impl DumpWriter {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
|
||||
pub fn create_batches_queue(&self) -> Result<BatchWriter> {
|
||||
BatchWriter::new(self.dir.path().join("batches"))
|
||||
}
|
||||
|
||||
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
Ok(std::fs::write(
|
||||
self.dir.path().join("experimental-features.json"),
|
||||
@ -61,6 +66,10 @@ impl DumpWriter {
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn create_network(&self, network: Network) -> Result<()> {
|
||||
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
|
||||
}
|
||||
|
||||
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
|
||||
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
|
||||
let mut tar_encoder = tar::Builder::new(gz_encoder);
|
||||
@ -84,7 +93,7 @@ impl KeyWriter {
|
||||
}
|
||||
|
||||
pub fn push_key(&mut self, key: &Key) -> Result<()> {
|
||||
self.keys.write_all(&serde_json::to_vec(key)?)?;
|
||||
serde_json::to_writer(&mut self.keys, &key)?;
|
||||
self.keys.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
@ -114,7 +123,7 @@ impl TaskWriter {
|
||||
/// Pushes tasks in the dump.
|
||||
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
|
||||
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
|
||||
self.queue.write_all(&serde_json::to_vec(task)?)?;
|
||||
serde_json::to_writer(&mut self.queue, &task)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
|
||||
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
|
||||
@ -126,6 +135,30 @@ impl TaskWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BatchWriter {
|
||||
queue: BufWriter<File>,
|
||||
}
|
||||
|
||||
impl BatchWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
let queue = File::create(path.join("queue.jsonl"))?;
|
||||
Ok(BatchWriter { queue: BufWriter::new(queue) })
|
||||
}
|
||||
|
||||
/// Pushes batches in the dump.
|
||||
pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
|
||||
serde_json::to_writer(&mut self.queue, &batch)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn flush(mut self) -> Result<()> {
|
||||
self.queue.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
writer: Option<BufWriter<File>>,
|
||||
@ -137,8 +170,8 @@ impl UpdateFile {
|
||||
}
|
||||
|
||||
pub fn push_document(&mut self, document: &Document) -> Result<()> {
|
||||
if let Some(writer) = self.writer.as_mut() {
|
||||
writer.write_all(&serde_json::to_vec(document)?)?;
|
||||
if let Some(mut writer) = self.writer.as_mut() {
|
||||
serde_json::to_writer(&mut writer, &document)?;
|
||||
writer.write_all(b"\n")?;
|
||||
} else {
|
||||
let file = File::create(&self.path).unwrap();
|
||||
@ -205,8 +238,8 @@ pub(crate) mod test {
|
||||
use super::*;
|
||||
use crate::reader::Document;
|
||||
use crate::test::{
|
||||
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
|
||||
create_test_settings, create_test_tasks,
|
||||
create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
|
||||
create_test_instance_uid, create_test_settings, create_test_tasks,
|
||||
};
|
||||
|
||||
fn create_directory_hierarchy(dir: &Path) -> String {
|
||||
@ -281,8 +314,10 @@ pub(crate) mod test {
|
||||
let dump_path = dump.path();
|
||||
|
||||
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
|
||||
.
|
||||
├---- batches/
|
||||
│ └---- queue.jsonl
|
||||
├---- indexes/
|
||||
│ └---- doggos/
|
||||
│ │ ├---- documents.jsonl
|
||||
@ -295,8 +330,9 @@ pub(crate) mod test {
|
||||
├---- experimental-features.json
|
||||
├---- instance_uid.uuid
|
||||
├---- keys.jsonl
|
||||
└---- metadata.json
|
||||
"###);
|
||||
├---- metadata.json
|
||||
└---- network.json
|
||||
");
|
||||
|
||||
// ==== checking the top level infos
|
||||
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
|
||||
@ -349,6 +385,16 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
// ==== checking the batch queue
|
||||
let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
|
||||
for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
|
||||
let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
|
||||
if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
|
||||
batch.details.settings = None;
|
||||
}
|
||||
assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
|
||||
}
|
||||
|
||||
// ==== checking the keys
|
||||
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
|
||||
for (key, expected) in keys.lines().zip(create_test_api_keys()) {
|
||||
|
BIN
crates/dump/tests/assets/v6-with-network.dump
Normal file
BIN
crates/dump/tests/assets/v6-with-network.dump
Normal file
Binary file not shown.
@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
@ -136,6 +136,14 @@ pub struct File {
|
||||
}
|
||||
|
||||
impl File {
|
||||
pub fn from_parts(path: PathBuf, file: Option<NamedTempFile>) -> Self {
|
||||
Self { path, file }
|
||||
}
|
||||
|
||||
pub fn into_parts(self) -> (PathBuf, Option<NamedTempFile>) {
|
||||
(self.path, self.file)
|
||||
}
|
||||
|
||||
pub fn dry_file() -> Result<Self> {
|
||||
Ok(Self { path: PathBuf::new(), file: None })
|
||||
}
|
||||
|
@ -17,4 +17,5 @@ nom_locate = "4.2.0"
|
||||
unescaper = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.39.0"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
|
@ -30,6 +30,25 @@ pub enum Condition<'a> {
|
||||
StartsWith { keyword: Token<'a>, word: Token<'a> },
|
||||
}
|
||||
|
||||
impl Condition<'_> {
|
||||
pub fn operator(&self) -> &str {
|
||||
match self {
|
||||
Condition::GreaterThan(_) => ">",
|
||||
Condition::GreaterThanOrEqual(_) => ">=",
|
||||
Condition::Equal(_) => "=",
|
||||
Condition::NotEqual(_) => "!=",
|
||||
Condition::Null => "IS NULL",
|
||||
Condition::Empty => "IS EMPTY",
|
||||
Condition::Exists => "EXISTS",
|
||||
Condition::LowerThan(_) => "<",
|
||||
Condition::LowerThanOrEqual(_) => "<=",
|
||||
Condition::Between { .. } => "TO",
|
||||
Condition::Contains { .. } => "CONTAINS",
|
||||
Condition::StartsWith { .. } => "STARTS WITH",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// condition = value ("==" | ">" ...) value
|
||||
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||
|
@ -35,7 +35,7 @@ impl<E> NomErrorExt<E> for nom::Err<E> {
|
||||
pub fn cut_with_err<'a, O>(
|
||||
mut parser: impl FnMut(Span<'a>) -> IResult<'a, O>,
|
||||
mut with: impl FnMut(Error<'a>) -> Error<'a>,
|
||||
) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||
) -> impl FnMut(Span<'a>) -> IResult<'a, O> {
|
||||
move |input| match parser.parse(input) {
|
||||
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))),
|
||||
rest => rest,
|
||||
@ -121,7 +121,7 @@ impl<'a> ParseError<Span<'a>> for Error<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for Error<'a> {
|
||||
impl Display for Error<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let input = self.context.fragment();
|
||||
// When printing our error message we want to escape all `\n` to be sure we keep our format with the
|
||||
|
@ -80,7 +80,7 @@ pub struct Token<'a> {
|
||||
value: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for Token<'a> {
|
||||
impl PartialEq for Token<'_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.span.fragment() == other.span.fragment()
|
||||
}
|
||||
@ -179,6 +179,26 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
|
||||
if depth == 0 {
|
||||
return Box::new(std::iter::empty());
|
||||
}
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } | FilterCondition::In { fid, .. } => {
|
||||
Box::new(std::iter::once(fid))
|
||||
}
|
||||
FilterCondition::Not(filter) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
filter.fids(depth)
|
||||
}
|
||||
FilterCondition::And(subfilters) | FilterCondition::Or(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
Box::new(subfilters.iter().flat_map(move |f| f.fids(depth)))
|
||||
}
|
||||
_ => Box::new(std::iter::empty()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
match self {
|
||||
@ -206,7 +226,7 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(input: &'a str) -> Result<Option<Self>, Error> {
|
||||
pub fn parse(input: &'a str) -> Result<Option<Self>, Error<'a>> {
|
||||
if input.trim().is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
@ -507,7 +527,7 @@ pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
|
||||
terminated(|input| parse_expression(input, 0), eof)(input)
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Display for FilterCondition<'a> {
|
||||
impl std::fmt::Display for FilterCondition<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
FilterCondition::Not(filter) => {
|
||||
@ -556,7 +576,8 @@ impl<'a> std::fmt::Display for FilterCondition<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Condition<'a> {
|
||||
|
||||
impl std::fmt::Display for Condition<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Condition::GreaterThan(token) => write!(f, "> {token}"),
|
||||
@ -574,7 +595,8 @@ impl<'a> std::fmt::Display for Condition<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Token<'a> {
|
||||
|
||||
impl std::fmt::Display for Token<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{{{}}}", self.value())
|
||||
}
|
||||
@ -978,6 +1000,43 @@ pub mod tests {
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fids() {
|
||||
let filter = Fc::parse("field = value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field IN [1, 2, 3]").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field != value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 AND field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 OR field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let depth = 2;
|
||||
let filter =
|
||||
Fc::parse("field1 = value1 AND (field2 = value2 OR field3 = value3)").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(depth).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_from_str() {
|
||||
let s = "test string that should not be parsed";
|
||||
|
@ -52,7 +52,7 @@ fn quoted_by(quote: char, input: Span) -> IResult<Token> {
|
||||
}
|
||||
|
||||
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
|
||||
pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
||||
pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<'a, Token<'a>> {
|
||||
let (input, word): (_, Token<'a>) =
|
||||
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
||||
if is_keyword(word.value()) {
|
||||
|
@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
fastrand = "2.1.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.10.1"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
@ -10,8 +10,9 @@ use either::Either;
|
||||
use fuzzers::Operation;
|
||||
use milli::documents::mmap_from_objects;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::Index;
|
||||
use serde_json::Value;
|
||||
@ -56,13 +57,14 @@ fn main() {
|
||||
let opt = opt.clone();
|
||||
|
||||
let handle = std::thread::spawn(move || {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(1024 * 1024 * 1024 * 1024);
|
||||
let tempdir = match opt.path {
|
||||
Some(path) => TempDir::new_in(path).unwrap(),
|
||||
None => TempDir::new().unwrap(),
|
||||
};
|
||||
let index = Index::new(options, tempdir.path()).unwrap();
|
||||
let index = Index::new(options, tempdir.path(), true).unwrap();
|
||||
let indexer_config = IndexerConfig::default();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
@ -88,9 +90,7 @@ fn main() {
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer = indexer::DocumentOperation::new(
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
for op in batch.0 {
|
||||
@ -114,7 +114,7 @@ fn main() {
|
||||
for op in &operations {
|
||||
match op {
|
||||
Either::Left(documents) => {
|
||||
indexer.add_documents(documents).unwrap()
|
||||
indexer.replace_documents(documents).unwrap()
|
||||
}
|
||||
Either::Right(ids) => indexer.delete_documents(ids),
|
||||
}
|
||||
@ -128,13 +128,14 @@ fn main() {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@ -142,7 +143,7 @@ fn main() {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
@ -11,41 +11,42 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.20.0"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
flate2 = "1.0.35"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.5"
|
||||
page_size = "0.6.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
memmap2 = "0.9.4"
|
||||
time = { version = "0.3.36", features = [
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
bumpalo = "3.16.0"
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
crossbeam-channel = "0.5.14"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
304
crates/index-scheduler/src/dump.rs
Normal file
304
crates/index-scheduler/src/dump.rs
Normal file
@ -0,0 +1,304 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{utils, Error, IndexScheduler, Result};
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a>,
|
||||
|
||||
batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
|
||||
batch_indexes: HashMap<String, RoaringBitmap>,
|
||||
batch_statuses: HashMap<Status, RoaringBitmap>,
|
||||
batch_kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
|
||||
// While loading a dump no one should be able to access the scheduler thus I can block everything.
|
||||
let wtxn = index_scheduler.env.write_txn()?;
|
||||
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
batch_to_task_mapping: HashMap::new(),
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
batch_indexes: HashMap::new(),
|
||||
batch_statuses: HashMap::new(),
|
||||
batch_kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new batch coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
|
||||
self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
|
||||
if let Some(enqueued_at) = batch.enqueued_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.earliest,
|
||||
batch.uid,
|
||||
)?;
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.oldest,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
|
||||
for index in batch.stats.index_uids.keys() {
|
||||
match self.batch_indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(batch.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(batch.uid);
|
||||
self.batch_indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for status in batch.stats.status.keys() {
|
||||
self.batch_statuses.entry(*status).or_default().insert(batch.uid);
|
||||
}
|
||||
for kind in batch.stats.types.keys() {
|
||||
self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
let mut writer = io::BufWriter::new(file);
|
||||
for doc in content_file {
|
||||
let doc = doc?;
|
||||
serde_json::to_writer(&mut writer, &doc).map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})?;
|
||||
}
|
||||
let file = writer.into_inner().map_err(|e| e.into_error())?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
None if task.status == Status::Enqueued && task_has_no_docs => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
batch_uid: task.batch_uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentDeletionByFilter { filter } => {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
filter_expr: filter,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentEdition { filter, context, function } => {
|
||||
KindWithContent::DocumentEdition {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
filter_expr: filter,
|
||||
context,
|
||||
function,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
|
||||
if let Some(batch_id) = task.batch_uid {
|
||||
self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
|
||||
}
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(task.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(task.uid);
|
||||
self.indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_default().insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (batch_id, task_ids) in self.batch_to_task_mapping {
|
||||
self.index_scheduler.queue.batch_to_tasks_mapping.put(
|
||||
&mut self.wtxn,
|
||||
&batch_id,
|
||||
&task_ids,
|
||||
)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.statuses {
|
||||
self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.kinds {
|
||||
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.batch_indexes {
|
||||
self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.batch_statuses {
|
||||
self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.batch_kinds {
|
||||
self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -104,11 +104,13 @@ pub enum Error {
|
||||
)]
|
||||
InvalidTaskCanceledBy { canceled_by: String },
|
||||
#[error(
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
|
||||
)]
|
||||
InvalidIndexUid { index_uid: String },
|
||||
#[error("Task `{0}` not found.")]
|
||||
TaskNotFound(TaskId),
|
||||
#[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
|
||||
TaskFileNotFound(TaskId),
|
||||
#[error("Batch `{0}` not found.")]
|
||||
BatchNotFound(BatchId),
|
||||
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
@ -122,10 +124,13 @@ pub enum Error {
|
||||
Dump(#[from] dump::Error),
|
||||
#[error(transparent)]
|
||||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error("{}", match .index_uid {
|
||||
Some(uid) if !uid.is_empty() => format!("Index `{}`: {error}", uid),
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_uid: Option<String> },
|
||||
#[error("An unexpected crash occurred when processing the task: {0}")]
|
||||
ProcessBatchPanicked(String),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
@ -144,7 +149,9 @@ pub enum Error {
|
||||
#[error("Corrupted task queue.")]
|
||||
CorruptedTaskQueue,
|
||||
#[error(transparent)]
|
||||
TaskDatabaseUpdate(Box<Self>),
|
||||
DatabaseUpgrade(Box<Self>),
|
||||
#[error(transparent)]
|
||||
UnrecoverableError(Box<Self>),
|
||||
#[error(transparent)]
|
||||
HeedTransaction(heed::Error),
|
||||
|
||||
@ -184,14 +191,15 @@ impl Error {
|
||||
| Error::InvalidTaskCanceledBy { .. }
|
||||
| Error::InvalidIndexUid { .. }
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskFileNotFound(_)
|
||||
| Error::BatchNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::Milli { .. }
|
||||
| Error::ProcessBatchPanicked(_)
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
@ -199,7 +207,8 @@ impl Error {
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
| Error::TaskDatabaseUpdate(_)
|
||||
| Error::DatabaseUpgrade(_)
|
||||
| Error::UnrecoverableError(_)
|
||||
| Error::HeedTransaction(_) => false,
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => false,
|
||||
@ -209,6 +218,20 @@ impl Error {
|
||||
pub fn with_custom_error_code(self, code: Code) -> Self {
|
||||
Self::WithCustomErrorCode(code, Box::new(self))
|
||||
}
|
||||
|
||||
pub fn from_milli(err: milli::Error, index_uid: Option<String>) -> Self {
|
||||
match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
milli::Error::UserError(milli::UserError::InvalidFilterExpression { .. }) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
_ => Self::Milli { error: err, index_uid },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
@ -230,14 +253,15 @@ impl ErrorCode for Error {
|
||||
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
|
||||
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
|
||||
Error::TaskNotFound(_) => Code::TaskNotFound,
|
||||
Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
|
||||
Error::BatchNotFound(_) => Code::BatchNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
// TODO: not sure of the Code to use
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
@ -249,7 +273,8 @@ impl ErrorCode for Error {
|
||||
Error::Anyhow(_) => Code::Internal,
|
||||
Error::CorruptedTaskQueue => Code::Internal,
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||
Error::UnrecoverableError(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
|
@ -1,18 +1,29 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
|
||||
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
/// The number of database used by features
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
network: Arc<RwLock<Network>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@ -56,19 +67,6 @@ impl RoFeatures {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "vector store",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/677",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.edit_documents_by_function {
|
||||
Ok(())
|
||||
@ -94,17 +92,62 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_network(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.network {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "network",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/805",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_get_task_documents_route(&self) -> Result<()> {
|
||||
if self.runtime.get_task_documents_route {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting the documents of an enqueued task",
|
||||
feature: "get task documents route",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/808",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.composite_embedders {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "composite embedders",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
instance_features: InstanceTogglableFeatures,
|
||||
) -> Result<Self> {
|
||||
let runtime_features_db =
|
||||
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
|
||||
|
||||
let txn = env.read_txn()?;
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: metrics || persisted_features.metrics,
|
||||
@ -113,7 +156,14 @@ impl FeatureData {
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
Ok(Self { persisted: runtime_features_db, runtime })
|
||||
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
|
||||
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
|
||||
|
||||
Ok(Self {
|
||||
persisted: runtime_features_db,
|
||||
runtime,
|
||||
network: Arc::new(RwLock::new(network)),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
@ -121,7 +171,7 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
@ -142,4 +192,21 @@ impl FeatureData {
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
|
||||
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
|
||||
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
|
||||
&mut wtxn,
|
||||
db_keys::NETWORK,
|
||||
&new_network,
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
let mut network = self.network.write().unwrap();
|
||||
*network = new_network;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Network {
|
||||
Network::clone(&*self.network.read().unwrap())
|
||||
}
|
||||
}
|
||||
|
@ -1,16 +1,17 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::env::VarError;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
use meilisearch_types::milli::Index;
|
||||
use meilisearch_types::milli::{Index, Result};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
|
||||
use crate::clamp_to_page_size;
|
||||
use crate::lru::{InsertionOutcome, LruMap};
|
||||
use crate::{clamp_to_page_size, Result};
|
||||
|
||||
/// Keep an internally consistent view of the open indexes in memory.
|
||||
///
|
||||
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
|
||||
@ -103,7 +104,7 @@ impl ReopenableIndex {
|
||||
return Ok(());
|
||||
}
|
||||
map.unavailable.remove(&self.uuid);
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size, false)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@ -172,11 +173,12 @@ impl IndexMap {
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
if !matches!(self.get_unavailable(uuid), Missing) {
|
||||
panic!("Attempt to open an index that was unavailable");
|
||||
}
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size, creation)?;
|
||||
match self.available.insert(*uuid, index.clone()) {
|
||||
InsertionOutcome::InsertedNew => (),
|
||||
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
|
||||
@ -300,18 +302,31 @@ fn create_or_open_index(
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
options.max_readers(1024);
|
||||
|
||||
// You can find more details about this experimental
|
||||
// environment variable on the following GitHub discussion:
|
||||
// <https://github.com/orgs/meilisearch/discussions/806>
|
||||
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
|
||||
Ok(value) => u32::from_str(&value).unwrap(),
|
||||
Err(VarError::NotPresent) => 1024,
|
||||
Err(VarError::NotUnicode(value)) => panic!(
|
||||
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
|
||||
),
|
||||
};
|
||||
options.max_readers(max_readers);
|
||||
if enable_mdb_writemap {
|
||||
unsafe { options.flags(EnvFlags::WRITE_MAP) };
|
||||
}
|
||||
|
||||
if let Some((created, updated)) = date {
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated, creation)?)
|
||||
} else {
|
||||
Ok(Index::new(options, path)?)
|
||||
Ok(Index::new(options, path, creation)?)
|
||||
}
|
||||
}
|
||||
|
||||
@ -319,17 +334,17 @@ fn create_or_open_index(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use meilisearch_types::heed::Env;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::Index;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::super::IndexMapper;
|
||||
use crate::tests::IndexSchedulerHandle;
|
||||
use crate::test_utils::IndexSchedulerHandle;
|
||||
use crate::utils::clamp_to_page_size;
|
||||
use crate::IndexScheduler;
|
||||
|
||||
impl IndexMapper {
|
||||
fn test() -> (Self, Env, IndexSchedulerHandle) {
|
||||
fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
|
||||
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
|
||||
(index_scheduler.index_mapper, index_scheduler.env, handle)
|
||||
}
|
||||
|
@ -4,7 +4,9 @@ use std::time::Duration;
|
||||
use std::{fs, thread};
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::database_stats::DatabaseStats;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -15,12 +17,17 @@ use uuid::Uuid;
|
||||
use self::index_map::IndexMap;
|
||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{Error, Result};
|
||||
use crate::{Error, IndexBudget, IndexSchedulerOptions, Result};
|
||||
|
||||
mod index_map;
|
||||
|
||||
const INDEX_MAPPING: &str = "index-mapping";
|
||||
const INDEX_STATS: &str = "index-stats";
|
||||
/// The number of database used by index mapper
|
||||
const NUMBER_OF_DATABASES: u32 = 2;
|
||||
/// Database const names for the `IndexMapper`.
|
||||
mod db_name {
|
||||
pub const INDEX_MAPPING: &str = "index-mapping";
|
||||
pub const INDEX_STATS: &str = "index-stats";
|
||||
}
|
||||
|
||||
/// Structure managing meilisearch's indexes.
|
||||
///
|
||||
@ -92,19 +99,32 @@ pub enum IndexStatus {
|
||||
/// The statistics that can be computed from an `Index` object.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexStats {
|
||||
/// Number of documents in the index.
|
||||
pub number_of_documents: u64,
|
||||
/// Stats of the documents database.
|
||||
#[serde(default)]
|
||||
pub documents_database_stats: DatabaseStats,
|
||||
|
||||
#[serde(default, skip_serializing)]
|
||||
pub number_of_documents: Option<u64>,
|
||||
|
||||
/// Size taken up by the index' DB, in bytes.
|
||||
///
|
||||
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||
/// `used_database_size` that only includes the size of the used pages.
|
||||
pub database_size: u64,
|
||||
/// Number of embeddings in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embeddings: Option<u64>,
|
||||
/// Number of embedded documents in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embedded_documents: Option<u64>,
|
||||
/// Size taken by the used pages of the index' DB, in bytes.
|
||||
///
|
||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||
/// this value is typically smaller than `database_size`.
|
||||
pub used_database_size: u64,
|
||||
/// The primary key of the index
|
||||
pub primary_key: Option<String>,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
/// Creation date of the index.
|
||||
@ -121,11 +141,16 @@ impl IndexStats {
|
||||
/// # Parameters
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||
let arroy_stats = index.arroy_stats(rtxn)?;
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
|
||||
number_of_embedded_documents: Some(arroy_stats.documents.len()),
|
||||
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
|
||||
number_of_documents: None,
|
||||
database_size: index.on_disk_size()?,
|
||||
used_database_size: index.used_size()?,
|
||||
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
||||
field_distribution: index.field_distribution(rtxn)?,
|
||||
created_at: index.created_at(rtxn)?,
|
||||
updated_at: index.updated_at(rtxn)?,
|
||||
@ -134,29 +159,25 @@ impl IndexStats {
|
||||
}
|
||||
|
||||
impl IndexMapper {
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
base_path: PathBuf,
|
||||
index_base_map_size: usize,
|
||||
index_growth_amount: usize,
|
||||
index_count: usize,
|
||||
enable_mdb_writemap: bool,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
|
||||
let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
|
||||
wtxn.commit()?;
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
budget: IndexBudget,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
|
||||
index_mapping,
|
||||
index_stats,
|
||||
base_path,
|
||||
index_base_map_size,
|
||||
index_growth_amount,
|
||||
enable_mdb_writemap,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))),
|
||||
index_mapping: env.create_database(wtxn, Some(db_name::INDEX_MAPPING))?,
|
||||
index_stats: env.create_database(wtxn, Some(db_name::INDEX_STATS))?,
|
||||
base_path: options.indexes_path.clone(),
|
||||
index_base_map_size: budget.map_size,
|
||||
index_growth_amount: options.index_growth_amount,
|
||||
enable_mdb_writemap: options.enable_mdb_writemap,
|
||||
indexer_config: options.indexer_config.clone(),
|
||||
currently_updating_index: Default::default(),
|
||||
})
|
||||
}
|
||||
@ -183,13 +204,24 @@ impl IndexMapper {
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
// This is very unlikely to happen in practice.
|
||||
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
|
||||
let index = self.index_map.write().unwrap().create(
|
||||
let index = self
|
||||
.index_map
|
||||
.write()
|
||||
.unwrap()
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
true,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
self.store_stats_of(&mut wtxn, name, &stats)?;
|
||||
drop(index_rtxn);
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
@ -357,7 +389,9 @@ impl IndexMapper {
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
// take the lock to reopen the environment.
|
||||
reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
continue;
|
||||
}
|
||||
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
|
||||
@ -372,13 +406,16 @@ impl IndexMapper {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
|
||||
break index_map.create(
|
||||
break index_map
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
false,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
}
|
||||
Available(index) => break index,
|
||||
Closing(_) => {
|
||||
@ -460,6 +497,7 @@ impl IndexMapper {
|
||||
let index = self.index(rtxn, index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,15 +1,16 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Task};
|
||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||
use meilisearch_types::versioning;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::{IndexScheduler, Kind, Status, BEI128};
|
||||
use crate::{IndexScheduler, BEI128};
|
||||
|
||||
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
// Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run.
|
||||
@ -18,41 +19,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
scheduler.assert_internally_consistent();
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
cleanup_enabled: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
env,
|
||||
all_tasks,
|
||||
all_batches,
|
||||
batch_to_tasks_mapping,
|
||||
// task reverse index
|
||||
status,
|
||||
kind,
|
||||
index_tasks,
|
||||
canceled_by,
|
||||
enqueued_at,
|
||||
started_at,
|
||||
finished_at,
|
||||
|
||||
// batch reverse index
|
||||
batch_status,
|
||||
batch_kind,
|
||||
batch_index_tasks,
|
||||
batch_enqueued_at,
|
||||
batch_started_at,
|
||||
batch_finished_at,
|
||||
version,
|
||||
queue,
|
||||
scheduler,
|
||||
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
auth_path: _,
|
||||
version_file_path: _,
|
||||
webhook_url: _,
|
||||
webhook_authorization_header: _,
|
||||
test_breakpoint_sdr: _,
|
||||
@ -65,8 +40,18 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let mut snap = String::new();
|
||||
|
||||
let indx_sched_version = version.get_version(&rtxn).unwrap();
|
||||
let latest_version = (
|
||||
versioning::VERSION_MAJOR.parse().unwrap(),
|
||||
versioning::VERSION_MINOR.parse().unwrap(),
|
||||
versioning::VERSION_PATCH.parse().unwrap(),
|
||||
);
|
||||
if indx_sched_version != Some(latest_version) {
|
||||
snap.push_str(&format!("index scheduler running on version {indx_sched_version:?}\n"));
|
||||
}
|
||||
|
||||
let processing = processing_tasks.read().unwrap().clone();
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled));
|
||||
snap.push_str(&format!(
|
||||
"### Processing batch {:?}:\n",
|
||||
processing.batch.as_ref().map(|batch| batch.uid)
|
||||
@ -79,19 +64,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Tasks:\n");
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks));
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, *status));
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.tasks.status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, *kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Mapper:\n");
|
||||
@ -99,55 +84,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Canceled By:\n");
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by));
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Batches:\n");
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, *all_batches));
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batch to tasks mapping:\n");
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping));
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, *batch_status));
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.batches.status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, *batch_kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### File Store:\n");
|
||||
snap.push_str(&snapshot_file_store(file_store));
|
||||
snap.push_str(&snapshot_file_store(&queue.file_store));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap
|
||||
@ -306,6 +291,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexSwap { swaps } => {
|
||||
format!("{{ swaps: {swaps:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -353,14 +341,23 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
||||
|
||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
let mut snap = String::new();
|
||||
let Batch { uid, details, stats, started_at, finished_at } = batch;
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
|
||||
let stats = BatchStats {
|
||||
progress_trace: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
..stats.clone()
|
||||
};
|
||||
if let Some(finished_at) = finished_at {
|
||||
assert!(finished_at > started_at);
|
||||
}
|
||||
let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap();
|
||||
assert!(*started_at > earliest);
|
||||
assert!(earliest >= oldest);
|
||||
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(stats).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||
snap.push('}');
|
||||
snap
|
||||
}
|
||||
@ -373,7 +370,8 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
|
||||
let stats = mapper.stats_of(rtxn, &name).unwrap();
|
||||
s.push_str(&format!(
|
||||
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
|
||||
stats.number_of_documents, stats.field_distribution
|
||||
stats.documents_database_stats.number_of_entries(),
|
||||
stats.field_distribution
|
||||
));
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
289
crates/index-scheduler/src/processing.rs
Normal file
289
crates/index-scheduler/src/processing.rs
Normal file
@ -0,0 +1,289 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView};
|
||||
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::utils::ProcessingBatch;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct ProcessingTasks {
|
||||
pub batch: Option<Arc<ProcessingBatch>>,
|
||||
/// The list of tasks ids that are currently running.
|
||||
pub processing: Arc<RoaringBitmap>,
|
||||
/// The progress on processing tasks
|
||||
pub progress: Option<Progress>,
|
||||
}
|
||||
|
||||
impl ProcessingTasks {
|
||||
/// Creates an empty `ProcessingAt` struct.
|
||||
pub fn new() -> ProcessingTasks {
|
||||
ProcessingTasks::default()
|
||||
}
|
||||
|
||||
pub fn get_progress_view(&self) -> Option<ProgressView> {
|
||||
Some(self.progress.as_ref()?.as_progress_view())
|
||||
}
|
||||
|
||||
/// Stores the currently processing tasks, and the date time at which it started.
|
||||
pub fn start_processing(
|
||||
&mut self,
|
||||
processing_batch: ProcessingBatch,
|
||||
processing: RoaringBitmap,
|
||||
) -> Progress {
|
||||
self.batch = Some(Arc::new(processing_batch));
|
||||
self.processing = Arc::new(processing);
|
||||
let progress = Progress::default();
|
||||
progress.update_progress(BatchProgress::ProcessingTasks);
|
||||
self.progress = Some(progress.clone());
|
||||
|
||||
progress
|
||||
}
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
pub fn stop_processing(&mut self) -> Self {
|
||||
self.progress = None;
|
||||
|
||||
Self {
|
||||
batch: std::mem::take(&mut self.batch),
|
||||
processing: std::mem::take(&mut self.processing),
|
||||
progress: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
||||
!self.processing.is_disjoint(canceled_tasks)
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum BatchProgress {
|
||||
ProcessingTasks,
|
||||
WritingTasksToDisk,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
UpdatingTasks,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskDeletionProgress {
|
||||
DeletingTasksDateTime,
|
||||
DeletingTasksMetadata,
|
||||
DeletingTasks,
|
||||
DeletingBatches,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SnapshotCreationProgress {
|
||||
StartTheSnapshotCreation,
|
||||
SnapshotTheIndexScheduler,
|
||||
SnapshotTheUpdateFiles,
|
||||
SnapshotTheIndexes,
|
||||
SnapshotTheApiKeys,
|
||||
CreateTheTarball,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DumpCreationProgress {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheTasks,
|
||||
DumpTheBatches,
|
||||
DumpTheIndexes,
|
||||
DumpTheExperimentalFeatures,
|
||||
CompressTheDump,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum CreateIndexProgress {
|
||||
CreatingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum UpdateIndexProgress {
|
||||
UpdatingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DeleteIndexProgress {
|
||||
DeletingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SwappingTheIndexes {
|
||||
EnsuringCorrectnessOfTheSwap,
|
||||
SwappingTheIndexes,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum InnerSwappingTwoIndexes {
|
||||
RetrieveTheTasks,
|
||||
UpdateTheTasks,
|
||||
UpdateTheIndexesMetadata,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentOperationProgress {
|
||||
RetrievingConfig,
|
||||
ComputingDocumentChanges,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentEditionProgress {
|
||||
RetrievingConfig,
|
||||
ComputingDocumentChanges,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentDeletionProgress {
|
||||
RetrievingConfig,
|
||||
DeleteDocuments,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SettingsProgress {
|
||||
RetrievingAndMergingTheSettings,
|
||||
ApplyTheSettings,
|
||||
}
|
||||
}
|
||||
|
||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn one_level() {
|
||||
let mut processing = ProcessingTasks::new();
|
||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 0.0
|
||||
}
|
||||
"#);
|
||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 50.0
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_progress() {
|
||||
let mut processing = ProcessingTasks::new();
|
||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||
let (atomic, tasks) = AtomicTaskStep::new(10);
|
||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 0,
|
||||
"total": 10
|
||||
}
|
||||
],
|
||||
"percentage": 0.0
|
||||
}
|
||||
"#);
|
||||
atomic.fetch_add(6, Ordering::Relaxed);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 6,
|
||||
"total": 10
|
||||
}
|
||||
],
|
||||
"percentage": 30.000002
|
||||
}
|
||||
"#);
|
||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 50.0
|
||||
}
|
||||
"#);
|
||||
let (atomic, tasks) = AtomicTaskStep::new(5);
|
||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||
atomic.fetch_add(4, Ordering::Relaxed);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 4,
|
||||
"total": 5
|
||||
}
|
||||
],
|
||||
"percentage": 90.0
|
||||
}
|
||||
"#);
|
||||
}
|
||||
}
|
603
crates/index-scheduler/src/queue/batches.rs
Normal file
603
crates/index-scheduler/src/queue/batches.rs
Normal file
@ -0,0 +1,603 @@
|
||||
use std::collections::HashSet;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
insert_task_datetime, keep_ids_within_datetimes, map_bound,
|
||||
remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, Result, BEI128};
|
||||
|
||||
/// The number of database used by the batch queue
|
||||
const NUMBER_OF_DATABASES: u32 = 7;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const ALL_BATCHES: &str = "all-batches";
|
||||
|
||||
pub const BATCH_STATUS: &str = "batch-status";
|
||||
pub const BATCH_KIND: &str = "batch-kind";
|
||||
pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks";
|
||||
pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at";
|
||||
pub const BATCH_STARTED_AT: &str = "batch-started-at";
|
||||
pub const BATCH_FINISHED_AT: &str = "batch-finished-at";
|
||||
}
|
||||
|
||||
pub struct BatchQueue {
|
||||
/// Contains all the batches accessible by their Id.
|
||||
pub(crate) all_batches: Database<BEU32, SerdeJson<Batch>>,
|
||||
|
||||
/// All the batches containing a task matching the selected status.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the batches ids grouped by the kind of their task.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the batches associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing finished tasks started at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl BatchQueue {
|
||||
pub(crate) fn private_clone(&self) -> BatchQueue {
|
||||
BatchQueue {
|
||||
all_batches: self.all_batches,
|
||||
status: self.status,
|
||||
kind: self.kind,
|
||||
index_tasks: self.index_tasks,
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
|
||||
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,
|
||||
kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?,
|
||||
index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?,
|
||||
enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?,
|
||||
started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?,
|
||||
finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result<BatchId> {
|
||||
Ok(self
|
||||
.all_batches
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.last(rtxn)?
|
||||
.map(|(k, _)| k + 1)
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<Option<Batch>> {
|
||||
Ok(self.all_batches.get(rtxn, &batch_id)?)
|
||||
}
|
||||
|
||||
/// Returns the whole set of batches that belongs to this index.
|
||||
pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn update_index(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut batches = self.index_batches(wtxn, index)?;
|
||||
f(&mut batches);
|
||||
if batches.is_empty() {
|
||||
self.index_tasks.delete(wtxn, index)?;
|
||||
} else {
|
||||
self.index_tasks.put(wtxn, index, &batches)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
self.put_status(wtxn, status, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
self.put_kind(wtxn, kind, &tasks)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
|
||||
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
|
||||
|
||||
self.all_batches.put(
|
||||
wtxn,
|
||||
&batch.uid,
|
||||
&Batch {
|
||||
uid: batch.uid,
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
},
|
||||
)?;
|
||||
|
||||
// Update the statuses
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
for status in old_batch.stats.status.keys() {
|
||||
self.update_status(wtxn, *status, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for status in batch.statuses {
|
||||
self.update_status(wtxn, status, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the kinds / types
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let kinds: HashSet<_> = old_batch.stats.types.keys().cloned().collect();
|
||||
for kind in kinds.difference(&batch.kinds) {
|
||||
self.update_kind(wtxn, *kind, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for kind in batch.kinds {
|
||||
self.update_kind(wtxn, kind, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the indexes
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let indexes: HashSet<_> = old_batch.stats.index_uids.keys().cloned().collect();
|
||||
for index in indexes.difference(&batch.indexes) {
|
||||
self.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for index in batch.indexes {
|
||||
self.update_index(wtxn, &index, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the enqueued_at: we cannot retrieve the previous enqueued at from the previous batch, and
|
||||
// must instead go through the db looking for it. We cannot look at the task contained in this batch either
|
||||
// because they may have been removed.
|
||||
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
|
||||
// to the DB per batches.
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
if let Some(enqueued_at) = old_batch.enqueued_at {
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.enqueued_at,
|
||||
old_batch.started_at,
|
||||
old_batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
old_batch.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// A finished batch MUST contains at least one task and have an enqueued_at
|
||||
let enqueued_at = batch.enqueued_at.as_ref().unwrap();
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
|
||||
|
||||
// Update the started at and finished at
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
remove_task_datetime(wtxn, self.started_at, old_batch.started_at, old_batch.uid)?;
|
||||
if let Some(finished_at) = old_batch.finished_at {
|
||||
remove_task_datetime(wtxn, self.finished_at, finished_at, old_batch.uid)?;
|
||||
}
|
||||
}
|
||||
insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of batches. The batches MUST exist or a
|
||||
/// `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_batches(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
tasks: impl IntoIterator<Item = BatchId>,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<Vec<Batch>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|batch_id| {
|
||||
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
let mut batch = processing.batch.as_ref().unwrap().to_batch();
|
||||
batch.progress = processing.get_progress_view();
|
||||
Ok(batch)
|
||||
} else {
|
||||
self.get_batch(rtxn, batch_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
}
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
/// Return the batch ids matched by the given query from the index scheduler's point of view.
|
||||
pub(crate) fn get_batch_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let Query {
|
||||
limit,
|
||||
from,
|
||||
reverse,
|
||||
uids,
|
||||
batch_uids,
|
||||
statuses,
|
||||
types,
|
||||
index_uids,
|
||||
canceled_by,
|
||||
before_enqueued_at,
|
||||
after_enqueued_at,
|
||||
before_started_at,
|
||||
after_started_at,
|
||||
before_finished_at,
|
||||
after_finished_at,
|
||||
} = query;
|
||||
|
||||
let mut batches = self.batches.all_batch_ids(rtxn)?;
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
batches.insert(batch_id);
|
||||
}
|
||||
|
||||
if let Some(from) = from {
|
||||
let range = if reverse.unwrap_or_default() {
|
||||
u32::MIN..*from
|
||||
} else {
|
||||
from.saturating_add(1)..u32::MAX
|
||||
};
|
||||
batches.remove_range(range);
|
||||
}
|
||||
|
||||
if let Some(batch_uids) = &batch_uids {
|
||||
let batches_uids = RoaringBitmap::from_iter(batch_uids);
|
||||
batches &= batches_uids;
|
||||
}
|
||||
|
||||
if let Some(status) = &statuses {
|
||||
let mut status_batches = RoaringBitmap::new();
|
||||
for status in status {
|
||||
match status {
|
||||
// special case for Processing batches
|
||||
Status::Processing => {
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
status_batches.insert(batch_id);
|
||||
}
|
||||
}
|
||||
// Enqueued tasks are not stored in batches
|
||||
Status::Enqueued => (),
|
||||
status => status_batches |= &self.batches.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
if let Some(ref batch) = processing.batch {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
batches &= status_batches;
|
||||
}
|
||||
|
||||
if let Some(task_uids) = &uids {
|
||||
let mut batches_by_task_uids = RoaringBitmap::new();
|
||||
for task_uid in task_uids {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? {
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
batches_by_task_uids.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
batches &= batches_by_task_uids;
|
||||
}
|
||||
|
||||
// There is no database for this query, we must retrieve the task queried by the client and ensure it's valid
|
||||
if let Some(canceled_by) = &canceled_by {
|
||||
let mut all_canceled_batches = RoaringBitmap::new();
|
||||
for cancel_uid in canceled_by {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? {
|
||||
if task.kind.as_kind() == Kind::TaskCancelation
|
||||
&& task.status == Status::Succeeded
|
||||
{
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
all_canceled_batches.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the canceled_by has been specified but no batch
|
||||
// matches then we prefer matching zero than all batches.
|
||||
if all_canceled_batches.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
} else {
|
||||
batches &= all_canceled_batches;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kind) = &types {
|
||||
let mut kind_batches = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_batches |= self.batches.get_kind(rtxn, *kind)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid))
|
||||
{
|
||||
kind_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &kind_batches;
|
||||
}
|
||||
|
||||
if let Some(index) = &index_uids {
|
||||
let mut index_batches = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_batches |= self.batches.index_batches(rtxn, index)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.indexes.contains(index).then_some(batch.uid))
|
||||
{
|
||||
index_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &index_batches;
|
||||
}
|
||||
|
||||
// For the started_at filter, we need to treat the part of the batches that are processing from the part of the
|
||||
// batches that are not processing. The non-processing ones are filtered normally while the processing ones
|
||||
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
||||
batches = {
|
||||
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
||||
(&batches - &*processing.processing, &batches & &*processing.processing);
|
||||
|
||||
// special case for Processing batches
|
||||
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
||||
let mut clear_filtered_processing_batches =
|
||||
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let is_within_dates = RangeBounds::contains(
|
||||
&(start, end),
|
||||
&processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
|
||||
.unix_timestamp_nanos(),
|
||||
);
|
||||
if !is_within_dates {
|
||||
filtered_processing_batches.clear();
|
||||
}
|
||||
};
|
||||
match (after_started_at, before_started_at) {
|
||||
(None, None) => (),
|
||||
(None, Some(before)) => {
|
||||
clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before))
|
||||
}
|
||||
(Some(after), None) => {
|
||||
clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded)
|
||||
}
|
||||
(Some(after), Some(before)) => clear_filtered_processing_batches(
|
||||
Bound::Excluded(*after),
|
||||
Bound::Excluded(*before),
|
||||
),
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut filtered_non_processing_batches,
|
||||
self.batches.started_at,
|
||||
*after_started_at,
|
||||
*before_started_at,
|
||||
)?;
|
||||
filtered_non_processing_batches | filtered_processing_batches
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.enqueued_at,
|
||||
*after_enqueued_at,
|
||||
*before_enqueued_at,
|
||||
)?;
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.finished_at,
|
||||
*after_finished_at,
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
batches = if query.reverse.unwrap_or_default() {
|
||||
batches.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
batches.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
|
||||
Ok(batches)
|
||||
}
|
||||
|
||||
/// Return the batch ids matching the query along with the total number of batches
|
||||
/// by ignoring the from and limit parameters from the user's point of view.
|
||||
///
|
||||
/// There are two differences between an internal query and a query executed by
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub(crate) fn get_batch_ids_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(RoaringBitmap, u64)> {
|
||||
// compute all batches matching the filter by ignoring the limits, to find the number of batches matching
|
||||
// the filter.
|
||||
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
|
||||
// us from modifying the underlying implementation, and the performance remains sufficient.
|
||||
// Should this change, we would modify `get_batch_ids` to directly return the number of matching batches.
|
||||
let total_batches =
|
||||
self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?;
|
||||
let mut batches = self.get_batch_ids(rtxn, query, processing)?;
|
||||
|
||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||
// then we must exclude all the batches that only contains tasks associated to multiple indexes.
|
||||
// This works because we don't autobatch tasks associated to multiple indexes with tasks associated
|
||||
// to a single index. e.g: IndexSwap cannot be batched with IndexCreation.
|
||||
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||
batches -= self.tasks.get_kind(rtxn, kind)?;
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
if batch.kinds.contains(&kind) {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Any batch that is internally associated with at least one authorized index
|
||||
// must be returned.
|
||||
if !filters.all_indexes_authorized() {
|
||||
let mut valid_indexes = RoaringBitmap::new();
|
||||
let mut forbidden_indexes = RoaringBitmap::new();
|
||||
|
||||
let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?;
|
||||
for result in all_indexes_iter {
|
||||
let (index, index_tasks) = result?;
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes |= index_tasks;
|
||||
} else {
|
||||
forbidden_indexes |= index_tasks;
|
||||
}
|
||||
}
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
for index in &batch.indexes {
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes.insert(batch.uid);
|
||||
} else {
|
||||
forbidden_indexes.insert(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a batch had ONE valid task then it should be returned
|
||||
let invalid_batches = forbidden_indexes - valid_indexes;
|
||||
|
||||
batches -= invalid_batches;
|
||||
}
|
||||
|
||||
Ok((batches, total_batches.len()))
|
||||
}
|
||||
|
||||
pub(crate) fn get_batches_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(Vec<Batch>, u64)> {
|
||||
let (batches, total) =
|
||||
self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?;
|
||||
let batches = if query.reverse.unwrap_or_default() {
|
||||
Box::new(batches.into_iter()) as Box<dyn Iterator<Item = u32>>
|
||||
} else {
|
||||
Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
|
||||
let batches = self.batches.get_existing_batches(
|
||||
rtxn,
|
||||
batches.take(query.limit.unwrap_or(u32::MAX) as usize),
|
||||
processing,
|
||||
)?;
|
||||
|
||||
Ok((batches, total))
|
||||
}
|
||||
}
|
476
crates/index-scheduler/src/queue/batches_test.rs
Normal file
476
crates/index-scheduler/src/queue/batches_test.rs
Normal file
@ -0,0 +1,476 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, FailureLocation};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn query_batches_from_and_limit() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
let kind = index_creation_task("whalo", "plankton");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
let kind = index_creation_task("catto", "his_own_vomit");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_n_successful_batches(3);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
|
||||
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query { from: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_simple() {
|
||||
let start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (mut batches, _) = index_scheduler
|
||||
.get_batches_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
assert!(batches[0].enqueued_at.is_some());
|
||||
batches[0].enqueued_at = None;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null,
|
||||
"enqueuedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks
|
||||
|
||||
let query =
|
||||
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should excludes all of them
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should exclude the enqueued tasks and include the only processing task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit");
|
||||
|
||||
let second_start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should include all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the second part of the test and before one minute after the
|
||||
// second start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// we run the same query to verify that, and indeed find that the last task is matched
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||
// again only return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// now the last task should have failed
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// so running the last query should return nothing
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![1]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with an invalid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![2]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with a valid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_special_rules() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||
// associated with doggo -> empty result
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
drop(rtxn);
|
||||
// We're going to advance and process all the batches for the next query to actually hit the db
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
]);
|
||||
handle.advance_one_successful_batch();
|
||||
handle.advance_n_failed_batches(2);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything");
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||
// -> only the index creation of doggo should be returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![
|
||||
IndexUidPattern::new_unchecked("catto"),
|
||||
IndexUidPattern::new_unchecked("doggo"),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||
// -> all tasks except the swap of catto with whalo are returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_canceled_by() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
let kind = KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_string(),
|
||||
tasks: [0, 1, 2, 3].into_iter().collect(),
|
||||
};
|
||||
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// The batch zero was the index creation task, the 1 is the task cancellation
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
// Return only 1 because the user is not authorized to see task 2
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
}
|
391
crates/index-scheduler/src/queue/mod.rs
Normal file
391
crates/index-scheduler/src/queue/mod.rs
Normal file
@ -0,0 +1,391 @@
|
||||
mod batches;
|
||||
#[cfg(test)]
|
||||
mod batches_test;
|
||||
mod tasks;
|
||||
#[cfg(test)]
|
||||
mod tasks_test;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File as StdFile;
|
||||
use std::time::Duration;
|
||||
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub(crate) use self::batches::BatchQueue;
|
||||
pub(crate) use self::tasks::TaskQueue;
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexSchedulerOptions, Result, TaskId};
|
||||
|
||||
/// The number of database used by queue itself
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
|
||||
}
|
||||
|
||||
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
|
||||
///
|
||||
/// An empty/default query (where each field is set to `None`) matches all tasks.
|
||||
/// Each non-null field restricts the set of tasks further.
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Query {
|
||||
/// The maximum number of tasks to be matched
|
||||
pub limit: Option<u32>,
|
||||
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub from: Option<u32>,
|
||||
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
|
||||
pub reverse: Option<bool>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub uids: Option<Vec<TaskId>>,
|
||||
/// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched
|
||||
pub batch_uids: Option<Vec<BatchId>>,
|
||||
/// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls
|
||||
pub statuses: Option<Vec<Status>>,
|
||||
/// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks.
|
||||
///
|
||||
/// The kind of a task is given by:
|
||||
/// ```
|
||||
/// # use meilisearch_types::tasks::{Task, Kind};
|
||||
/// # fn doc_func(task: Task) -> Kind {
|
||||
/// task.kind.as_kind()
|
||||
/// # }
|
||||
/// ```
|
||||
pub types: Option<Vec<Kind>>,
|
||||
/// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks
|
||||
pub index_uids: Option<Vec<String>>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks
|
||||
/// that canceled the matched tasks.
|
||||
pub canceled_by: Option<Vec<TaskId>>,
|
||||
/// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub before_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub after_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub before_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub after_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub before_finished_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub after_finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
/// Return `true` if every field of the query is set to `None`, such that the query
|
||||
/// matches all tasks.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
reverse: None,
|
||||
uids: None,
|
||||
batch_uids: None,
|
||||
statuses: None,
|
||||
types: None,
|
||||
index_uids: None,
|
||||
canceled_by: None,
|
||||
before_enqueued_at: None,
|
||||
after_enqueued_at: None,
|
||||
before_started_at: None,
|
||||
after_started_at: None,
|
||||
before_finished_at: None,
|
||||
after_finished_at: None,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
|
||||
pub fn with_index(self, index_uid: String) -> Self {
|
||||
let mut index_vec = self.index_uids.unwrap_or_default();
|
||||
index_vec.push(index_uid);
|
||||
Self { index_uids: Some(index_vec), ..self }
|
||||
}
|
||||
|
||||
// Removes the `from` and `limit` restrictions from the query.
|
||||
// Useful to get the total number of tasks matching a filter.
|
||||
pub fn without_limits(self) -> Self {
|
||||
Query { limit: None, from: None, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
/// to be performed on them.
|
||||
pub struct Queue {
|
||||
pub(crate) tasks: tasks::TaskQueue,
|
||||
pub(crate) batches: batches::BatchQueue,
|
||||
|
||||
/// Matches a batch id with the associated task ids.
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
|
||||
/// The list of files referenced by the tasks.
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
pub(crate) fn private_clone(&self) -> Queue {
|
||||
Queue {
|
||||
tasks: self.tasks.private_clone(),
|
||||
batches: self.batches.private_clone(),
|
||||
batch_to_tasks_mapping: self.batch_to_tasks_mapping,
|
||||
file_store: self.file_store.clone(),
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
tasks::TaskQueue::nb_db() + batches::BatchQueue::nb_db() + NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub(crate) fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
) -> Result<Self> {
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
Ok(Self {
|
||||
file_store: FileStore::new(&options.update_file_path)?,
|
||||
batch_to_tasks_mapping: env
|
||||
.create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?,
|
||||
tasks: TaskQueue::new(env, wtxn)?,
|
||||
batches: BatchQueue::new(env, wtxn)?,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this batch.
|
||||
pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<RoaringBitmap> {
|
||||
Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks.
|
||||
///
|
||||
/// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_tasks_for_processing_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing_batch: &mut ProcessingBatch,
|
||||
tasks: impl IntoIterator<Item = TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|task_id| {
|
||||
let mut task = self
|
||||
.tasks
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue));
|
||||
processing_batch.processing(&mut task);
|
||||
task
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
batch: ProcessingBatch,
|
||||
tasks: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?;
|
||||
self.batches.write_batch(wtxn, batch)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
|
||||
match task.content_uuid() {
|
||||
Some(content_file) => self.delete_update_file(content_file),
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Open and returns the task's content File.
|
||||
pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
|
||||
self.file_store.get_update(uuid)
|
||||
}
|
||||
|
||||
/// Delete a file from the index scheduler.
|
||||
///
|
||||
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.
|
||||
pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> {
|
||||
Ok(self.file_store.delete(uuid)?)
|
||||
}
|
||||
|
||||
/// Create a file and register it in the index scheduler.
|
||||
///
|
||||
/// The returned file and uuid can be used to associate
|
||||
/// some data to a task. The file will be kept until
|
||||
/// the task has been fully processed.
|
||||
pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> {
|
||||
if dry_run {
|
||||
Ok((Uuid::nil(), file_store::File::dry_file()?))
|
||||
} else {
|
||||
Ok(self.file_store.new_update()?)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> {
|
||||
Ok(self.file_store.new_update_with_uuid(uuid)?)
|
||||
}
|
||||
|
||||
/// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes.
|
||||
pub fn compute_update_file_size(&self) -> Result<u64> {
|
||||
Ok(self.file_store.compute_total_size()?)
|
||||
}
|
||||
|
||||
pub fn register(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: &KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
|
||||
if let Some(uid) = task_id {
|
||||
if uid < next_task_id {
|
||||
return Err(Error::BadTaskId { received: uid, expected: next_task_id });
|
||||
}
|
||||
}
|
||||
|
||||
let mut task = Task {
|
||||
uid: task_id.unwrap_or(next_task_id),
|
||||
// The batch is defined once we starts processing the task
|
||||
batch_uid: None,
|
||||
enqueued_at: OffsetDateTime::now_utc(),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
error: None,
|
||||
canceled_by: None,
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
filter_out_references_to_newer_tasks(&mut task);
|
||||
// If the register task is an index swap task, verify that it is well-formed
|
||||
// (that it does not contain duplicate indexes).
|
||||
check_index_swap_validity(&task)?;
|
||||
|
||||
// At this point the task is going to be registered and no further checks will be done
|
||||
if dry_run {
|
||||
return Ok(task);
|
||||
}
|
||||
|
||||
// Get rid of the mutability.
|
||||
let task = task;
|
||||
self.tasks.register(wtxn, &task)?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> {
|
||||
let nb_tasks = self.tasks.all_task_ids(wtxn)?.len();
|
||||
// if we have less than 1M tasks everything is fine
|
||||
if nb_tasks < self.max_number_of_tasks as u64 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
|
||||
let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
|
||||
|
||||
self.register(
|
||||
wtxn,
|
||||
&KindWithContent::TaskDeletion {
|
||||
query: format!(
|
||||
"?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
|
||||
delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
|
||||
),
|
||||
tasks: to_delete,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_stats(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||
let mut res = BTreeMap::new();
|
||||
let processing_tasks = processing.processing.len();
|
||||
|
||||
res.insert(
|
||||
"statuses".to_string(),
|
||||
enum_iterator::all::<Status>()
|
||||
.map(|s| {
|
||||
let tasks = self.tasks.get_status(rtxn, s)?.len();
|
||||
match s {
|
||||
Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
|
||||
Status::Processing => Ok((s.to_string(), processing_tasks)),
|
||||
s => Ok((s.to_string(), tasks)),
|
||||
}
|
||||
})
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"types".to_string(),
|
||||
enum_iterator::all::<Kind>()
|
||||
.map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len())))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"indexes".to_string(),
|
||||
self.tasks
|
||||
.index_tasks
|
||||
.iter(rtxn)?
|
||||
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,11 +1,11 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user