mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-12 15:45:48 +00:00
Compare commits
145 Commits
v1.28.2
...
openapi-co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5ebc0b4f32 | ||
|
|
550d916795 | ||
|
|
508987e87a | ||
|
|
082efaad3b | ||
|
|
035d9f03c1 | ||
|
|
26473df6c0 | ||
|
|
6b30a7a705 | ||
|
|
5914447b87 | ||
|
|
9a78786696 | ||
|
|
1d5eeaf74d | ||
|
|
a321a6f1e3 | ||
|
|
26e368b116 | ||
|
|
ba95ac0915 | ||
|
|
75fcbfc2fe | ||
|
|
8c19b6d55e | ||
|
|
08d0f05ece | ||
|
|
4762e9afa0 | ||
|
|
12fcab91c5 | ||
|
|
792a72a23f | ||
|
|
2dd7f29edf | ||
|
|
ff680d29a8 | ||
|
|
00420dfca0 | ||
|
|
a3a86ac629 | ||
|
|
f6210b8e5e | ||
|
|
fe46af7ded | ||
|
|
57b94b411f | ||
|
|
a7b6f65851 | ||
|
|
1ec6646d8c | ||
|
|
2dccacf273 | ||
|
|
ce0f04e9ee | ||
|
|
9ba5c6d371 | ||
|
|
56673fee56 | ||
|
|
b30bcbb931 | ||
|
|
5fbe4436c8 | ||
|
|
8fa253c293 | ||
|
|
4833da9edb | ||
|
|
c0e31a4f01 | ||
|
|
c06ffb31d1 | ||
|
|
3097314b9d | ||
|
|
786a978237 | ||
|
|
03e53aaf6d | ||
|
|
2206f045a4 | ||
|
|
246cf8b2d1 | ||
|
|
82adabc5a0 | ||
|
|
c9a22247d2 | ||
|
|
c535b8ddef | ||
|
|
8e89619aed | ||
|
|
f617ca8e38 | ||
|
|
959175ad2a | ||
|
|
341ffbf5ef | ||
|
|
542f3073f4 | ||
|
|
0f134b079f | ||
|
|
9e7ae47355 | ||
|
|
1edf07df29 | ||
|
|
88aa3cddde | ||
|
|
e6846cb55a | ||
|
|
29b715e2f9 | ||
|
|
f28dc5bd2b | ||
|
|
56d0b8ea54 | ||
|
|
514edb1b79 | ||
|
|
cfb609d41d | ||
|
|
11cb062067 | ||
|
|
2ca4926ac5 | ||
|
|
834bd9b879 | ||
|
|
cac7e00983 | ||
|
|
e9300bac64 | ||
|
|
b0da7864a4 | ||
|
|
2b9d379feb | ||
|
|
8d585a04d4 | ||
|
|
0095a72fba | ||
|
|
651339648c | ||
|
|
a489f4c172 | ||
|
|
3b875ea00e | ||
|
|
9d269c499c | ||
|
|
da35ae0a6e | ||
|
|
61945b235d | ||
|
|
e936ac172d | ||
|
|
162a84cdbf | ||
|
|
92c63cf351 | ||
|
|
fca35b7476 | ||
|
|
4056657a55 | ||
|
|
685d227597 | ||
|
|
49b9f6ff38 | ||
|
|
79d0a3fb97 | ||
|
|
313ef7e79b | ||
|
|
256407be61 | ||
|
|
8b3943bd32 | ||
|
|
87b972d29a | ||
|
|
09ab61b360 | ||
|
|
2459f381b4 | ||
|
|
6442f02de4 | ||
|
|
91c4d9ea79 | ||
|
|
92a4091da3 | ||
|
|
29a337f0f9 | ||
|
|
8c3cebadaa | ||
|
|
b566458aa2 | ||
|
|
ae4344e359 | ||
|
|
b6cb384650 | ||
|
|
2c3e3d856c | ||
|
|
93e97f814c | ||
|
|
e9350f033d | ||
|
|
54c92fd6c0 | ||
|
|
4f4df83a51 | ||
|
|
a51021cab7 | ||
|
|
e33f4fdeae | ||
|
|
e407bca196 | ||
|
|
cd24ea11b4 | ||
|
|
ba578e7ab5 | ||
|
|
05a74d1e68 | ||
|
|
41d61deb97 | ||
|
|
bba292b01a | ||
|
|
96923dff33 | ||
|
|
8f9c9305da | ||
|
|
a9f309e1d1 | ||
|
|
e456a9acd8 | ||
|
|
9b7d29466c | ||
|
|
b0ef14b6f0 | ||
|
|
36febe2068 | ||
|
|
6f14a6ec18 | ||
|
|
fce046d84d | ||
|
|
3fc507bb44 | ||
|
|
fdbcd033fb | ||
|
|
aaab49baca | ||
|
|
0d0d6e8099 | ||
|
|
c1e351c92b | ||
|
|
67cab4cc9d | ||
|
|
f30a37b0fe | ||
|
|
a78a9f80dd | ||
|
|
439fee5434 | ||
|
|
9e858590e0 | ||
|
|
29eebd5f93 | ||
|
|
07da6edbdf | ||
|
|
22b83042e6 | ||
|
|
52ab13906a | ||
|
|
29bec8efd4 | ||
|
|
6947a8990b | ||
|
|
fbb2bb0c73 | ||
|
|
15918f53a9 | ||
|
|
d7f5f3a0a3 | ||
|
|
1afbf35f27 | ||
|
|
d7675233d5 | ||
|
|
c63c1ac32b | ||
|
|
6171dcde0d | ||
|
|
04bc134324 | ||
|
|
8ff39d927d |
5
.github/ISSUE_TEMPLATE/new_feature_issue.md
vendored
5
.github/ISSUE_TEMPLATE/new_feature_issue.md
vendored
@@ -24,6 +24,11 @@ TBD
|
|||||||
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
|
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
|
||||||
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
|
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
|
||||||
|
|
||||||
|
### Reminders when adding features
|
||||||
|
|
||||||
|
- [ ] Write unit tests using insta
|
||||||
|
- [ ] Write declarative integration tests in [workloads/tests](https://github.com/meilisearch/meilisearch/tree/main/workloads/test). Specify the routes to call and then call `cargo xtask test workloads/tests/YOUR_TEST.json --update-responses` so that responses are automatically filled.
|
||||||
|
|
||||||
### Reminders when modifying the API
|
### Reminders when modifying the API
|
||||||
|
|
||||||
- [ ] Update the openAPI file with utoipa:
|
- [ ] Update the openAPI file with utoipa:
|
||||||
|
|||||||
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
timeout-minutes: 180 # 3h
|
timeout-minutes: 180 # 3h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
|
|
||||||
|
|||||||
4
.github/workflows/bench-pr.yml
vendored
4
.github/workflows/bench-pr.yml
vendored
@@ -66,9 +66,7 @@ jobs:
|
|||||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||||
|
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
|
|
||||||
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
4
.github/workflows/bench-push-indexing.yml
vendored
4
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,9 +12,7 @@ jobs:
|
|||||||
timeout-minutes: 180 # 3h
|
timeout-minutes: 180 # 3h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
|
|
||||||
# Run benchmarks
|
# Run benchmarks
|
||||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
||||||
|
|||||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/benchmarks-pr.yml
vendored
2
.github/workflows/benchmarks-pr.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ jobs:
|
|||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ jobs:
|
|||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ jobs:
|
|||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ jobs:
|
|||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
|
|
||||||
|
|||||||
6
.github/workflows/db-change-comments.yml
vendored
6
.github/workflows/db-change-comments.yml
vendored
@@ -6,7 +6,7 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
MESSAGE: |
|
MESSAGE: |
|
||||||
### Hello, I'm a bot 🤖
|
### Hello, I'm a bot 🤖
|
||||||
|
|
||||||
You are receiving this message because you declared that this PR make changes to the Meilisearch database.
|
You are receiving this message because you declared that this PR make changes to the Meilisearch database.
|
||||||
Depending on the nature of the change, additional actions might be required on your part. The following sections detail the additional actions depending on the nature of the change, please copy the relevant section in the description of your PR, and make sure to perform the required actions.
|
Depending on the nature of the change, additional actions might be required on your part. The following sections detail the additional actions depending on the nature of the change, please copy the relevant section in the description of your PR, and make sure to perform the required actions.
|
||||||
@@ -19,6 +19,7 @@ env:
|
|||||||
|
|
||||||
- [ ] Detail the change to the DB format and why they are forward compatible
|
- [ ] Detail the change to the DB format and why they are forward compatible
|
||||||
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
|
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
|
||||||
|
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
|
||||||
|
|
||||||
|
|
||||||
## This PR makes breaking changes
|
## This PR makes breaking changes
|
||||||
@@ -35,8 +36,7 @@ env:
|
|||||||
- [ ] Write the code to go from the old database to the new one
|
- [ ] Write the code to go from the old database to the new one
|
||||||
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
|
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
|
||||||
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
|
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
|
||||||
- [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
|
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
add-comment:
|
add-comment:
|
||||||
|
|||||||
10
.github/workflows/flaky-tests.yml
vendored
10
.github/workflows/flaky-tests.yml
vendored
@@ -3,7 +3,7 @@ name: Look for flaky tests
|
|||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 4 * * *' # Every day at 4:00AM
|
- cron: "0 4 * * *" # Every day at 4:00AM
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
flaky:
|
flaky:
|
||||||
@@ -13,11 +13,17 @@ jobs:
|
|||||||
image: ubuntu:22.04
|
image: ubuntu:22.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
|
run: |
|
||||||
|
sudo rm -rf "/opt/ghc" || true
|
||||||
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
- name: Install needed dependencies
|
- name: Install needed dependencies
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Install cargo-flaky
|
- name: Install cargo-flaky
|
||||||
run: cargo install cargo-flaky
|
run: cargo install cargo-flaky
|
||||||
- name: Run cargo flaky in the dumps
|
- name: Run cargo flaky in the dumps
|
||||||
|
|||||||
4
.github/workflows/fuzzer-indexing.yml
vendored
4
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,9 +12,7 @@ jobs:
|
|||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
|
|
||||||
# Run benchmarks
|
# Run benchmarks
|
||||||
- name: Run the fuzzer
|
- name: Run the fuzzer
|
||||||
|
|||||||
8
.github/workflows/publish-apt-brew-pkg.yml
vendored
8
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -25,7 +25,13 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
|
run: |
|
||||||
|
sudo rm -rf "/opt/ghc" || true
|
||||||
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Install cargo-deb
|
- name: Install cargo-deb
|
||||||
run: cargo install cargo-deb
|
run: cargo install cargo-deb
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
|
|||||||
15
.github/workflows/publish-docker-images.yml
vendored
15
.github/workflows/publish-docker-images.yml
vendored
@@ -208,8 +208,8 @@ jobs:
|
|||||||
done
|
done
|
||||||
cosign sign --yes ${images}
|
cosign sign --yes ${images}
|
||||||
|
|
||||||
# /!\ Don't touch this without checking with Cloud team
|
# /!\ Don't touch this without checking with engineers working on the Cloud code base on #discussion-engineering Slack channel
|
||||||
- name: Send CI information to Cloud team
|
- name: Notify meilisearch-cloud
|
||||||
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
|
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
|
||||||
if: ${{ (github.event_name == 'push') && (matrix.edition == 'enterprise') }}
|
if: ${{ (github.event_name == 'push') && (matrix.edition == 'enterprise') }}
|
||||||
uses: peter-evans/repository-dispatch@v3
|
uses: peter-evans/repository-dispatch@v3
|
||||||
@@ -218,3 +218,14 @@ jobs:
|
|||||||
repository: meilisearch/meilisearch-cloud
|
repository: meilisearch/meilisearch-cloud
|
||||||
event-type: cloud-docker-build
|
event-type: cloud-docker-build
|
||||||
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
|
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
|
||||||
|
|
||||||
|
# /!\ Don't touch this without checking with integration team members on #discussion-integrations Slack channel
|
||||||
|
- name: Notify meilisearch-kubernetes
|
||||||
|
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event), or if not stable
|
||||||
|
if: ${{ github.event_name == 'push' && matrix.edition == 'community' && steps.check-tag-format.outputs.stable == 'true' }}
|
||||||
|
uses: peter-evans/repository-dispatch@v3
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||||
|
repository: meilisearch/meilisearch-kubernetes
|
||||||
|
event-type: meilisearch-release
|
||||||
|
client-payload: '{ "version": "${{ github.ref_name }}" }'
|
||||||
|
|||||||
6
.github/workflows/publish-release-assets.yml
vendored
6
.github/workflows/publish-release-assets.yml
vendored
@@ -76,7 +76,7 @@ jobs:
|
|||||||
needs: check-version
|
needs: check-version
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --release --locked ${{ matrix.feature-flag }} ${{ matrix.extra-args }}
|
run: cargo build --release --locked ${{ matrix.feature-flag }} ${{ matrix.extra-args }}
|
||||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||||
@@ -104,13 +104,13 @@ jobs:
|
|||||||
- name: Generate OpenAPI file
|
- name: Generate OpenAPI file
|
||||||
run: |
|
run: |
|
||||||
cd crates/openapi-generator
|
cd crates/openapi-generator
|
||||||
cargo run --release -- --pretty --output ../../meilisearch.json
|
cargo run --release -- --pretty --output ../../meilisearch-openapi.json
|
||||||
- name: Upload OpenAPI to Release
|
- name: Upload OpenAPI to Release
|
||||||
# No need to upload for dry run (cron or workflow_dispatch)
|
# No need to upload for dry run (cron or workflow_dispatch)
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
uses: svenstaro/upload-release-action@2.11.2
|
uses: svenstaro/upload-release-action@2.11.2
|
||||||
with:
|
with:
|
||||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||||
file: ./meilisearch.json
|
file: ./meilisearch-openapi.json
|
||||||
asset_name: meilisearch-openapi.json
|
asset_name: meilisearch-openapi.json
|
||||||
tag: ${{ github.ref }}
|
tag: ${{ github.ref }}
|
||||||
|
|||||||
12
.github/workflows/sdks-tests.yml
vendored
12
.github/workflows/sdks-tests.yml
vendored
@@ -25,14 +25,18 @@ jobs:
|
|||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Define the Docker image we need to use
|
- name: Define the Docker image we need to use
|
||||||
id: define-image
|
id: define-image
|
||||||
|
env:
|
||||||
|
EVENT_NAME: ${{ github.event_name }}
|
||||||
|
DOCKER_IMAGE_INPUT: ${{ github.event.inputs.docker_image }}
|
||||||
run: |
|
run: |
|
||||||
event=${{ github.event_name }}
|
|
||||||
echo "docker-image=nightly" >> $GITHUB_OUTPUT
|
echo "docker-image=nightly" >> $GITHUB_OUTPUT
|
||||||
if [[ $event == 'workflow_dispatch' ]]; then
|
if [[ "$EVENT_NAME" == 'workflow_dispatch' ]]; then
|
||||||
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
|
echo "docker-image=$DOCKER_IMAGE_INPUT" >> $GITHUB_OUTPUT
|
||||||
fi
|
fi
|
||||||
- name: Docker image is ${{ steps.define-image.outputs.docker-image }}
|
- name: Docker image is ${{ steps.define-image.outputs.docker-image }}
|
||||||
run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"
|
env:
|
||||||
|
DOCKER_IMAGE: ${{ steps.define-image.outputs.docker-image }}
|
||||||
|
run: echo "Docker image is $DOCKER_IMAGE"
|
||||||
|
|
||||||
##########
|
##########
|
||||||
## SDKs ##
|
## SDKs ##
|
||||||
|
|||||||
163
.github/workflows/test-suite.yml
vendored
163
.github/workflows/test-suite.yml
vendored
@@ -19,31 +19,36 @@ jobs:
|
|||||||
runs-on: ${{ matrix.runner }}
|
runs-on: ${{ matrix.runner }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
runner: [ubuntu-24.04, ubuntu-24.04-arm]
|
runner: [ubuntu-22.04, ubuntu-22.04-arm]
|
||||||
features: ["", "--features enterprise"]
|
features: ["", "--features enterprise"]
|
||||||
container:
|
|
||||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
|
||||||
image: ubuntu:22.04
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Install needed dependencies
|
- name: check free space before
|
||||||
|
run: df -h
|
||||||
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
sudo rm -rf "/opt/ghc" || true
|
||||||
apt-get install build-essential -y
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- name: check free space after
|
||||||
|
run: df -h
|
||||||
- name: Setup test with Rust stable
|
- name: Setup test with Rust stable
|
||||||
uses: dtolnay/rust-toolchain@1.89
|
uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.8.0
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
- name: Run cargo check without any default features
|
with:
|
||||||
|
key: ${{ matrix.features }}
|
||||||
|
- name: Run cargo build without any default features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
args: --locked --release --no-default-features --all
|
args: --locked --no-default-features --all
|
||||||
- name: Run cargo test
|
- name: Run cargo test
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: test
|
command: test
|
||||||
args: --locked --release --all ${{ matrix.features }}
|
args: --locked --all ${{ matrix.features }}
|
||||||
|
|
||||||
test-others:
|
test-others:
|
||||||
name: Tests on ${{ matrix.os }}
|
name: Tests on ${{ matrix.os }}
|
||||||
@@ -53,53 +58,56 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
os: [macos-14, windows-2022]
|
os: [macos-14, windows-2022]
|
||||||
features: ["", "--features enterprise"]
|
features: ["", "--features enterprise"]
|
||||||
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.8.0
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Run cargo check without any default features
|
- name: Run cargo build without any default features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
args: --locked --release --no-default-features --all
|
args: --locked --no-default-features --all
|
||||||
- name: Run cargo test
|
- name: Run cargo test
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: test
|
command: test
|
||||||
args: --locked --release --all ${{ matrix.features }}
|
args: --locked --all ${{ matrix.features }}
|
||||||
|
|
||||||
test-all-features:
|
test-all-features:
|
||||||
name: Tests almost all features
|
name: Tests almost all features
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
container:
|
|
||||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
|
||||||
image: ubuntu:22.04
|
|
||||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Install needed dependencies
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
run: |
|
run: |
|
||||||
apt-get update
|
sudo rm -rf "/opt/ghc" || true
|
||||||
apt-get install --assume-yes build-essential curl
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Run cargo build with almost all features
|
- name: Run cargo build with almost all features
|
||||||
run: |
|
run: |
|
||||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
cargo build --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||||
- name: Run cargo test with almost all features
|
- name: Run cargo test with almost all features
|
||||||
run: |
|
run: |
|
||||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
cargo test --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||||
|
|
||||||
ollama-ubuntu:
|
ollama-ubuntu:
|
||||||
name: Test with Ollama
|
name: Test with Ollama
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
features: ["", "--features enterprise"]
|
|
||||||
env:
|
env:
|
||||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
|
run: |
|
||||||
|
sudo rm -rf "/opt/ghc" || true
|
||||||
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
- name: Install Ollama
|
- name: Install Ollama
|
||||||
run: |
|
run: |
|
||||||
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
||||||
@@ -123,21 +131,21 @@ jobs:
|
|||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: test
|
command: test
|
||||||
args: --locked --release --all --features test-ollama ollama ${{ matrix.features }}
|
args: --locked -p meilisearch --features test-ollama ollama
|
||||||
|
|
||||||
test-disabled-tokenization:
|
test-disabled-tokenization:
|
||||||
name: Test disabled tokenization
|
name: Test disabled tokenization
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
container:
|
|
||||||
image: ubuntu:22.04
|
|
||||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Install needed dependencies
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
run: |
|
run: |
|
||||||
apt-get update
|
sudo rm -rf "/opt/ghc" || true
|
||||||
apt-get install --assume-yes build-essential curl
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Run cargo tree without default features and check lindera is not present
|
- name: Run cargo tree without default features and check lindera is not present
|
||||||
run: |
|
run: |
|
||||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||||
@@ -148,35 +156,39 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
|
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
|
||||||
|
|
||||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
build:
|
||||||
test-debug:
|
name: Build in release
|
||||||
name: Run tests in debug
|
runs-on: ubuntu-22.04
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v5
|
||||||
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
|
run: |
|
||||||
|
sudo rm -rf "/opt/ghc" || true
|
||||||
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
|
- name: Cache dependencies
|
||||||
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
|
- name: Build
|
||||||
|
run: cargo build --release --locked --target x86_64-unknown-linux-gnu
|
||||||
|
|
||||||
|
clippy:
|
||||||
|
name: Run Clippy
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
features: ["", "--features enterprise"]
|
features: ["", "--features enterprise"]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
- name: Cache dependencies
|
run: |
|
||||||
uses: Swatinem/rust-cache@v2.8.0
|
sudo rm -rf "/opt/ghc" || true
|
||||||
- name: Run tests in debug
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
uses: actions-rs/cargo@v1
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
command: test
|
|
||||||
args: --locked --all ${{ matrix.features }}
|
|
||||||
|
|
||||||
clippy:
|
|
||||||
name: Run Clippy
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
features: ["", "--features enterprise"]
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v5
|
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
components: clippy
|
components: clippy
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.8.0
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
@@ -188,14 +200,17 @@ jobs:
|
|||||||
|
|
||||||
fmt:
|
fmt:
|
||||||
name: Run Rustfmt
|
name: Run Rustfmt
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
|
run: |
|
||||||
|
sudo rm -rf "/opt/ghc" || true
|
||||||
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
|
||||||
toolchain: nightly-2024-07-09
|
|
||||||
override: true
|
|
||||||
components: rustfmt
|
components: rustfmt
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.8.0
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
@@ -206,3 +221,23 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs
|
echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs
|
||||||
cargo fmt --all -- --check
|
cargo fmt --all -- --check
|
||||||
|
|
||||||
|
declarative-tests:
|
||||||
|
name: Run declarative tests
|
||||||
|
runs-on: ubuntu-22.04-arm
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v5
|
||||||
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
|
run: |
|
||||||
|
sudo rm -rf "/opt/ghc" || true
|
||||||
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
|
- name: Cache dependencies
|
||||||
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
|
- name: Run declarative tests
|
||||||
|
run: |
|
||||||
|
cargo xtask test workloads/tests/*.json
|
||||||
|
|||||||
10
.github/workflows/update-cargo-toml-version.yml
vendored
10
.github/workflows/update-cargo-toml-version.yml
vendored
@@ -18,9 +18,13 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- uses: dtolnay/rust-toolchain@1.89
|
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
|
||||||
with:
|
run: |
|
||||||
profile: minimal
|
sudo rm -rf "/opt/ghc" || true
|
||||||
|
sudo rm -rf "/usr/share/dotnet" || true
|
||||||
|
sudo rm -rf "/usr/local/lib/android" || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
- uses: dtolnay/rust-toolchain@1.91.1
|
||||||
- name: Install sd
|
- name: Install sd
|
||||||
run: cargo install sd
|
run: cargo install sd
|
||||||
- name: Update Cargo.toml file
|
- name: Update Cargo.toml file
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -29,3 +29,6 @@ crates/meilisearch/db.snapshot
|
|||||||
|
|
||||||
# Fuzzcheck data for the facet indexing fuzz test
|
# Fuzzcheck data for the facet indexing fuzz test
|
||||||
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
||||||
|
|
||||||
|
# OpenAPI generator
|
||||||
|
**/meilisearch-openapi.json
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ They are JSON files with the following structure (comments are not actually supp
|
|||||||
{
|
{
|
||||||
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
|
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
|
||||||
"name": "hackernews.ndjson_1M,no-threads",
|
"name": "hackernews.ndjson_1M,no-threads",
|
||||||
|
"type": "bench",
|
||||||
// Number of consecutive runs of the commands that should be performed.
|
// Number of consecutive runs of the commands that should be performed.
|
||||||
// Each run uses a fresh instance of Meilisearch and a fresh database.
|
// Each run uses a fresh instance of Meilisearch and a fresh database.
|
||||||
// Each run produces its own report file.
|
// Each run produces its own report file.
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ With swagger:
|
|||||||
With the internal crate:
|
With the internal crate:
|
||||||
```bash
|
```bash
|
||||||
cd crates/openapi-generator
|
cd crates/openapi-generator
|
||||||
cargo run --release -- --pretty --output meilisearch.json
|
cargo run --release -- --pretty
|
||||||
```
|
```
|
||||||
|
|
||||||
### Logging
|
### Logging
|
||||||
|
|||||||
1073
Cargo.lock
generated
1073
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.28.1"
|
version = "1.29.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
326
TESTING.md
Normal file
326
TESTING.md
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
# Declarative tests
|
||||||
|
|
||||||
|
Declarative tests ensure that Meilisearch features remain stable across versions.
|
||||||
|
|
||||||
|
While we already have unit tests, those are run against **temporary databases** that are created fresh each time and therefore never risk corruption.
|
||||||
|
|
||||||
|
Declarative tests instead **simulate the lifetime of a database**: they chain together commands and requests to change the binary, verifying that database state and API responses remain consistent.
|
||||||
|
|
||||||
|
## Basic example
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"type": "test",
|
||||||
|
"name": "api-keys",
|
||||||
|
"binary": { // the first command will run on the binary following this specification.
|
||||||
|
"source": "release", // get the binary as a release from GitHub
|
||||||
|
"version": "1.19.0", // version to fetch
|
||||||
|
"edition": "community" // edition to fetch
|
||||||
|
},
|
||||||
|
"commands": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This example defines a no-op test (it does nothing).
|
||||||
|
|
||||||
|
If the file is saved at `workloads/tests/example.json`, you can run it with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo xtask test workloads/tests/example.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
Commands represent API requests sent to Meilisearch endpoints during a test.
|
||||||
|
|
||||||
|
They are executed sequentially, and their responses can be validated to ensure consistent behavior across upgrades.
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
|
||||||
|
{
|
||||||
|
"route": "keys",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"actions": [
|
||||||
|
"search",
|
||||||
|
"documents.add"
|
||||||
|
],
|
||||||
|
"description": "Test API Key",
|
||||||
|
"expiresAt": null,
|
||||||
|
"indexes": [ "movies" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This command issues a `POST /keys` request, creating an API key with permissions to search and add documents in the `movies` index.
|
||||||
|
|
||||||
|
### Using assets in commands
|
||||||
|
|
||||||
|
To keep tests concise and reusable, you can define **assets** at the root of the workload file.
|
||||||
|
|
||||||
|
Assets are external data sources (such as datasets) that are cached between runs, making tests faster and easier to read.
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"type": "test",
|
||||||
|
"name": "movies",
|
||||||
|
"binary": {
|
||||||
|
"source": "release",
|
||||||
|
"version": "1.19.0",
|
||||||
|
"edition": "community"
|
||||||
|
},
|
||||||
|
"assets": {
|
||||||
|
"movies.json": {
|
||||||
|
"local_location": null,
|
||||||
|
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||||
|
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"commands": [
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"asset": "movies.json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In this example:
|
||||||
|
- The `movies.json` dataset is defined as an asset, pointing to a remote URL.
|
||||||
|
- The SHA-256 checksum ensures integrity.
|
||||||
|
- The `POST /indexes/movies/documents` command uses this asset as the request body.
|
||||||
|
|
||||||
|
This makes the test much cleaner than inlining a large dataset directly into the command.
|
||||||
|
|
||||||
|
For asset handling, please refer to the [declarative benchmarks documentation](/BENCHMARKS.md#adding-new-assets).
|
||||||
|
|
||||||
|
### Asserting responses
|
||||||
|
|
||||||
|
Commands can specify both the **expected status code** and the **expected response body**.
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"asset": "movies.json"
|
||||||
|
},
|
||||||
|
"expectedStatus": 202,
|
||||||
|
"expectedResponse": {
|
||||||
|
"enqueuedAt": "[timestamp]", // Set to a bracketed string to ignore the value
|
||||||
|
"indexUid": "movies",
|
||||||
|
"status": "enqueued",
|
||||||
|
"taskUid": 1,
|
||||||
|
"type": "documentAdditionOrUpdate"
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Manually writing `expectedResponse` fields can be tedious.
|
||||||
|
|
||||||
|
Instead, you can let the test runner populate them automatically:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run the workload to populate expected fields. Only adds the missing ones, doesn't change existing data
|
||||||
|
cargo xtask test workloads/tests/example.json --add-missing-responses
|
||||||
|
|
||||||
|
# OR
|
||||||
|
|
||||||
|
# Run the workload to populate expected fields. Updates all fields including existing ones
|
||||||
|
cargo xtask test workloads/tests/example.json --update-responses
|
||||||
|
```
|
||||||
|
|
||||||
|
This workflow is recommended:
|
||||||
|
|
||||||
|
1. Write the test without expected fields.
|
||||||
|
2. Run it with `--add-missing-responses` to capture the actual responses.
|
||||||
|
3. Review and commit the generated expectations.
|
||||||
|
|
||||||
|
## Changing binary
|
||||||
|
|
||||||
|
It is possible to insert an instruction to change the current Meilisearch instance from one binary specification to another during a test.
|
||||||
|
|
||||||
|
When executed, such an instruction will:
|
||||||
|
1. Stop the current Meilisearch instance.
|
||||||
|
2. Fetch the binary specified by the instruction.
|
||||||
|
3. Restart the server with the specified binary on the same database.
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"type": "test",
|
||||||
|
"name": "movies",
|
||||||
|
"binary": {
|
||||||
|
"source": "release",
|
||||||
|
"version": "1.19.0", // start with version v1.19.0
|
||||||
|
"edition": "community"
|
||||||
|
},
|
||||||
|
"assets": {
|
||||||
|
"movies.json": {
|
||||||
|
"local_location": null,
|
||||||
|
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||||
|
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"commands": [
|
||||||
|
// setup some data
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"asset": "movies.json"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// switch binary to v1.24.0
|
||||||
|
{
|
||||||
|
"binary": {
|
||||||
|
"source": "release",
|
||||||
|
"version": "1.24.0",
|
||||||
|
"edition": "community"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Typical Usage
|
||||||
|
|
||||||
|
In most cases, the change binary instruction will be used to update a database.
|
||||||
|
|
||||||
|
- **Set up** some data using commands on an older version.
|
||||||
|
- **Upgrade** to the latest version.
|
||||||
|
- **Assert** that the data and API behavior remain correct after the upgrade.
|
||||||
|
|
||||||
|
To properly test the dumpless upgrade, one should typically:
|
||||||
|
|
||||||
|
1. Open the database without processing the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` and `--experimental-max-number-of-batched-tasks=0` as extra CLI arguments
|
||||||
|
2. Check that the search, stats and task queue still work.
|
||||||
|
3. Open the database and process the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` as the extra CLI argument. Use a `health` command to wait for the upgrade task to finish.
|
||||||
|
4. Check that the indexing, search, stats, and task queue still work.
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"type": "test",
|
||||||
|
"name": "movies",
|
||||||
|
"binary": {
|
||||||
|
"source": "release",
|
||||||
|
"version": "1.12.0",
|
||||||
|
"edition": "community"
|
||||||
|
},
|
||||||
|
"commands": [
|
||||||
|
// 0. Run commands to populate the database
|
||||||
|
{
|
||||||
|
// ..
|
||||||
|
},
|
||||||
|
// 1. Open the database with new MS without processing the update task
|
||||||
|
{
|
||||||
|
"binary": {
|
||||||
|
"source": "build", // build the binary from the sources in the current git repository
|
||||||
|
"edition": "community",
|
||||||
|
"extraCliArgs": [
|
||||||
|
"--experimental-dumpless-upgrade", // allows to open with a newer MS
|
||||||
|
"--experimental-max-number-of-batched-tasks=0" // prevent processing of the update task
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// 2. Check the search etc.
|
||||||
|
{
|
||||||
|
// ..
|
||||||
|
},
|
||||||
|
// 3. Open the database with new MS and processing the update task
|
||||||
|
{
|
||||||
|
"binary": {
|
||||||
|
"source": "build", // build the binary from the sources in the current git repository
|
||||||
|
"edition": "community",
|
||||||
|
"extraCliArgs": [
|
||||||
|
"--experimental-dumpless-upgrade" // allows to open with a newer MS
|
||||||
|
// no `--experimental-max-number-of-batched-tasks=0`
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// 4. Check the indexing, search, etc.
|
||||||
|
{
|
||||||
|
// ..
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures backward compatibility: databases created with older Meilisearch versions should remain functional and consistent after an upgrade.
|
||||||
|
|
||||||
|
## Variables
|
||||||
|
|
||||||
|
Sometimes a command needs to use a value returned by a **previous response**.
|
||||||
|
These values can be captured and reused using the register field.
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"route": "keys",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"actions": [
|
||||||
|
"search",
|
||||||
|
"documents.add"
|
||||||
|
],
|
||||||
|
"description": "Test API Key",
|
||||||
|
"expiresAt": null,
|
||||||
|
"indexes": [ "movies" ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"expectedResponse": {
|
||||||
|
"key": "c6f64630bad2996b1f675007c8800168e14adf5d6a7bb1a400a6d2b158050eaf",
|
||||||
|
// ...
|
||||||
|
},
|
||||||
|
"register": {
|
||||||
|
"key": "/key"
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForResponse"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `register` field captures the value at the JSON path `/key` from the response.
|
||||||
|
Paths follow the **JavaScript Object Notation Pointer (RFC 6901)** format.
|
||||||
|
Registered variables are available for all subsequent commands.
|
||||||
|
|
||||||
|
Registered variables can be referenced by wrapping their name in double curly braces:
|
||||||
|
|
||||||
|
In the route/path:
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"route": "tasks/{{ task_id }}",
|
||||||
|
"method": "GET"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In the request body:
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"id": "{{ document_id }}",
|
||||||
|
"overview": "Shazam turns evil and the world is in danger.",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Or they can be referenced by their name (**without curly braces**) as an API key:
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": { /* ... */ },
|
||||||
|
"apiKeyVariable": "key" // The **content** of the key variable will be used as an API key
|
||||||
|
}
|
||||||
|
```
|
||||||
@@ -21,6 +21,10 @@ use roaring::RoaringBitmap;
|
|||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||||
|
|
||||||
|
fn no_cancel() -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
const BENCHMARK_ITERATION: usize = 10;
|
const BENCHMARK_ITERATION: usize = 10;
|
||||||
|
|
||||||
fn setup_dir(path: impl AsRef<Path>) {
|
fn setup_dir(path: impl AsRef<Path>) {
|
||||||
@@ -65,7 +69,7 @@ fn setup_settings<'t>(
|
|||||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||||
builder.set_sortable_fields(sortable_fields);
|
builder.set_sortable_fields(sortable_fields);
|
||||||
|
|
||||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
builder.execute(&no_cancel, &Progress::default(), Default::default()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn setup_index_with_settings(
|
fn setup_index_with_settings(
|
||||||
@@ -152,7 +156,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -168,7 +172,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -220,7 +224,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -236,7 +240,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -266,7 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -282,7 +286,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -336,7 +340,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -352,7 +356,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -414,7 +418,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -430,7 +434,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -460,7 +464,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -476,7 +480,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -502,7 +506,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -518,7 +522,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -571,7 +575,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -587,7 +591,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -639,7 +643,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -655,7 +659,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -707,7 +711,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -723,7 +727,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -774,7 +778,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -790,7 +794,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -820,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -836,7 +840,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -889,7 +893,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -905,7 +909,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -967,7 +971,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -983,7 +987,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1014,7 +1018,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1030,7 +1034,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1057,7 +1061,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1073,7 +1077,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1125,7 +1129,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1141,7 +1145,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1192,7 +1196,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1208,7 +1212,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1238,7 +1242,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1254,7 +1258,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1307,7 +1311,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1323,7 +1327,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1372,7 +1376,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
|||||||
Some(primary_key),
|
Some(primary_key),
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1422,7 +1426,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1438,7 +1442,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1468,7 +1472,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1484,7 +1488,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1510,7 +1514,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1526,7 +1530,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1601,7 +1605,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1617,7 +1621,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1693,7 +1697,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1709,7 +1713,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1777,7 +1781,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1793,7 +1797,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1845,7 +1849,7 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1861,7 +1865,7 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1912,7 +1916,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1928,7 +1932,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -1958,7 +1962,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -1974,7 +1978,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2027,7 +2031,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2043,7 +2047,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -15,4 +15,4 @@ time = { version = "0.3.44", features = ["parsing"] }
|
|||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.100"
|
anyhow = "1.0.100"
|
||||||
vergen-git2 = "1.0.7"
|
vergen-gitcl = "1.0.8"
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
|
|||||||
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
|
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
|
||||||
// in the corresponding GitHub workflow (publish_docker.yml).
|
// in the corresponding GitHub workflow (publish_docker.yml).
|
||||||
// This is due to the Dockerfile building the binary outside of the git directory.
|
// This is due to the Dockerfile building the binary outside of the git directory.
|
||||||
let mut builder = vergen_git2::Git2Builder::default();
|
let mut builder = vergen_gitcl::GitclBuilder::default();
|
||||||
|
|
||||||
builder.branch(true);
|
builder.branch(true);
|
||||||
builder.commit_timestamp(true);
|
builder.commit_timestamp(true);
|
||||||
@@ -25,5 +25,5 @@ fn emit_git_variables() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
let git2 = builder.build()?;
|
let git2 = builder.build()?;
|
||||||
|
|
||||||
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
|
vergen_gitcl::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
|
||||||
}
|
}
|
||||||
|
|||||||
6
crates/build-info/src/main.rs
Normal file
6
crates/build-info/src/main.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
use build_info::BuildInfo;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let info = BuildInfo::from_build();
|
||||||
|
dbg!(info);
|
||||||
|
}
|
||||||
@@ -107,19 +107,14 @@ impl Settings<Unchecked> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Default, Debug, Clone, PartialEq)]
|
||||||
pub enum Setting<T> {
|
pub enum Setting<T> {
|
||||||
Set(T),
|
Set(T),
|
||||||
Reset,
|
Reset,
|
||||||
|
#[default]
|
||||||
NotSet,
|
NotSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Default for Setting<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::NotSet
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Setting<T> {
|
impl<T> Setting<T> {
|
||||||
pub const fn is_not_set(&self) -> bool {
|
pub const fn is_not_set(&self) -> bool {
|
||||||
matches!(self, Self::NotSet)
|
matches!(self, Self::NotSet)
|
||||||
|
|||||||
@@ -161,19 +161,14 @@ pub struct Facets {
|
|||||||
pub min_level_size: Option<NonZeroUsize>,
|
pub min_level_size: Option<NonZeroUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum Setting<T> {
|
pub enum Setting<T> {
|
||||||
Set(T),
|
Set(T),
|
||||||
Reset,
|
Reset,
|
||||||
|
#[default]
|
||||||
NotSet,
|
NotSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Default for Setting<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::NotSet
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Setting<T> {
|
impl<T> Setting<T> {
|
||||||
pub fn map<U, F>(self, f: F) -> Setting<U>
|
pub fn map<U, F>(self, f: F) -> Setting<U>
|
||||||
where
|
where
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
use std::fmt::{self, Display, Formatter};
|
use std::fmt::{self, Display, Formatter};
|
||||||
use std::marker::PhantomData;
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use serde::de::Visitor;
|
use serde::Deserialize;
|
||||||
use serde::{Deserialize, Deserializer};
|
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::settings::{Settings, Unchecked};
|
use super::settings::{Settings, Unchecked};
|
||||||
@@ -82,59 +80,3 @@ impl Display for IndexUidFormatError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl std::error::Error for IndexUidFormatError {}
|
impl std::error::Error for IndexUidFormatError {}
|
||||||
|
|
||||||
/// A type that tries to match either a star (*) or
|
|
||||||
/// any other thing that implements `FromStr`.
|
|
||||||
#[derive(Debug)]
|
|
||||||
#[cfg_attr(test, derive(serde::Serialize))]
|
|
||||||
pub enum StarOr<T> {
|
|
||||||
Star,
|
|
||||||
Other(T),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'de, T, E> Deserialize<'de> for StarOr<T>
|
|
||||||
where
|
|
||||||
T: FromStr<Err = E>,
|
|
||||||
E: Display,
|
|
||||||
{
|
|
||||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
||||||
where
|
|
||||||
D: Deserializer<'de>,
|
|
||||||
{
|
|
||||||
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
|
|
||||||
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
|
|
||||||
/// deserialize everything as a `StarOr::Other`, including "*".
|
|
||||||
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
|
|
||||||
/// not supported on untagged enums.
|
|
||||||
struct StarOrVisitor<T>(PhantomData<T>);
|
|
||||||
|
|
||||||
impl<T, FE> Visitor<'_> for StarOrVisitor<T>
|
|
||||||
where
|
|
||||||
T: FromStr<Err = FE>,
|
|
||||||
FE: Display,
|
|
||||||
{
|
|
||||||
type Value = StarOr<T>;
|
|
||||||
|
|
||||||
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
|
|
||||||
formatter.write_str("a string")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
|
|
||||||
where
|
|
||||||
SE: serde::de::Error,
|
|
||||||
{
|
|
||||||
match v {
|
|
||||||
"*" => Ok(StarOr::Star),
|
|
||||||
v => {
|
|
||||||
let other = FromStr::from_str(v).map_err(|e: T::Err| {
|
|
||||||
SE::custom(format!("Invalid `other` value: {}", e))
|
|
||||||
})?;
|
|
||||||
Ok(StarOr::Other(other))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
deserializer.deserialize_str(StarOrVisitor(PhantomData))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -192,19 +192,14 @@ pub struct Facets {
|
|||||||
pub min_level_size: Option<NonZeroUsize>,
|
pub min_level_size: Option<NonZeroUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
|
||||||
pub enum Setting<T> {
|
pub enum Setting<T> {
|
||||||
Set(T),
|
Set(T),
|
||||||
Reset,
|
Reset,
|
||||||
|
#[default]
|
||||||
NotSet,
|
NotSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Default for Setting<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::NotSet
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Setting<T> {
|
impl<T> Setting<T> {
|
||||||
pub fn set(self) -> Option<T> {
|
pub fn set(self) -> Option<T> {
|
||||||
match self {
|
match self {
|
||||||
|
|||||||
@@ -47,20 +47,15 @@ pub struct Settings<T> {
|
|||||||
pub _kind: PhantomData<T>,
|
pub _kind: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
|
||||||
#[cfg_attr(test, derive(serde::Serialize))]
|
#[cfg_attr(test, derive(serde::Serialize))]
|
||||||
pub enum Setting<T> {
|
pub enum Setting<T> {
|
||||||
Set(T),
|
Set(T),
|
||||||
Reset,
|
Reset,
|
||||||
|
#[default]
|
||||||
NotSet,
|
NotSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Default for Setting<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::NotSet
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Setting<T> {
|
impl<T> Setting<T> {
|
||||||
pub fn set(self) -> Option<T> {
|
pub fn set(self) -> Option<T> {
|
||||||
match self {
|
match self {
|
||||||
|
|||||||
@@ -322,7 +322,7 @@ impl From<Task> for TaskView {
|
|||||||
_ => None,
|
_ => None,
|
||||||
});
|
});
|
||||||
|
|
||||||
let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
|
let duration = finished_at.zip(started_at).map(|(tf, ts)| tf - ts);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
uid: id,
|
uid: id,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
|||||||
use meilisearch_types::heed::{Database, RoTxn};
|
use meilisearch_types::heed::{Database, RoTxn};
|
||||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||||
use meilisearch_types::versioning;
|
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::index_mapper::IndexMapper;
|
use crate::index_mapper::IndexMapper;
|
||||||
@@ -320,7 +320,11 @@ fn snapshot_details(d: &Details) -> String {
|
|||||||
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
|
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
|
||||||
}
|
}
|
||||||
Details::UpgradeDatabase { from, to } => {
|
Details::UpgradeDatabase { from, to } => {
|
||||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
if to == &(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) {
|
||||||
|
format!("{{ from: {from:?}, to: [current version] }}")
|
||||||
|
} else {
|
||||||
|
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
|
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
|
||||||
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
|
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
|
||||||
@@ -400,7 +404,21 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
|||||||
|
|
||||||
snap.push('{');
|
snap.push('{');
|
||||||
snap.push_str(&format!("uid: {uid}, "));
|
snap.push_str(&format!("uid: {uid}, "));
|
||||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
let details = if let Some(upgrade_to) = &details.upgrade_to {
|
||||||
|
if upgrade_to.as_str()
|
||||||
|
== format!("v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}").as_str()
|
||||||
|
{
|
||||||
|
let mut details = details.clone();
|
||||||
|
|
||||||
|
details.upgrade_to = Some("[current version]".into());
|
||||||
|
serde_json::to_string(&details).unwrap()
|
||||||
|
} else {
|
||||||
|
serde_json::to_string(details).unwrap()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
serde_json::to_string(details).unwrap()
|
||||||
|
};
|
||||||
|
snap.push_str(&format!("details: {details}, "));
|
||||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||||
if !embedder_stats.skip_serializing() {
|
if !embedder_stats.skip_serializing() {
|
||||||
snap.push_str(&format!(
|
snap.push_str(&format!(
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
|||||||
[timestamp] [4,]
|
[timestamp] [4,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
@@ -37,7 +37,7 @@ catto [1,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
@@ -40,7 +40,7 @@ doggo [2,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
|
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
|
||||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
use meilisearch_types::versioning;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
@@ -9,83 +9,82 @@ use crate::queue::TaskQueue;
|
|||||||
use crate::versioning::Versioning;
|
use crate::versioning::Versioning;
|
||||||
|
|
||||||
trait UpgradeIndexScheduler {
|
trait UpgradeIndexScheduler {
|
||||||
fn upgrade(
|
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()>;
|
||||||
&self,
|
/// Whether the migration should be applied, depending on the initial version of the index scheduler before
|
||||||
env: &Env<WithoutTls>,
|
/// any migration was applied
|
||||||
wtxn: &mut RwTxn,
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
|
||||||
original: (u32, u32, u32),
|
/// A progress-centric description of the migration
|
||||||
) -> anyhow::Result<()>;
|
fn description(&self) -> &'static str;
|
||||||
fn target_version(&self) -> (u32, u32, u32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Upgrade the index scheduler to the binary version.
|
||||||
|
///
|
||||||
|
/// # Warning
|
||||||
|
///
|
||||||
|
/// The current implementation uses a single wtxn to the index scheduler for the whole duration of the upgrade.
|
||||||
|
/// If migrations start taking take a long time, it might prevent tasks from being registered.
|
||||||
|
/// If this issue manifests, then it can be mitigated by adding a `fn target_version` to `UpgradeIndexScheduler`,
|
||||||
|
/// to be able to write intermediate versions and drop the wtxn between applying migrations.
|
||||||
pub fn upgrade_index_scheduler(
|
pub fn upgrade_index_scheduler(
|
||||||
env: &Env<WithoutTls>,
|
env: &Env<WithoutTls>,
|
||||||
versioning: &Versioning,
|
versioning: &Versioning,
|
||||||
from: (u32, u32, u32),
|
initial_version: (u32, u32, u32),
|
||||||
to: (u32, u32, u32),
|
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let current_major = to.0;
|
let target_major: u32 = versioning::VERSION_MAJOR;
|
||||||
let current_minor = to.1;
|
let target_minor: u32 = versioning::VERSION_MINOR;
|
||||||
let current_patch = to.2;
|
let target_patch: u32 = versioning::VERSION_PATCH;
|
||||||
|
let target_version = (target_major, target_minor, target_patch);
|
||||||
|
|
||||||
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
|
if initial_version == target_version {
|
||||||
// This is the last upgrade function, it will be called when the index is up to date.
|
return Ok(());
|
||||||
// any other upgrade function should be added before this one.
|
|
||||||
&ToCurrentNoOp {},
|
|
||||||
];
|
|
||||||
|
|
||||||
let start = match from {
|
|
||||||
(1, 12, _) => 0,
|
|
||||||
(1, 13, _) => 0,
|
|
||||||
(1, 14, _) => 0,
|
|
||||||
(1, 15, _) => 0,
|
|
||||||
(1, 16, _) => 0,
|
|
||||||
(1, 17, _) => 0,
|
|
||||||
(1, 18, _) => 0,
|
|
||||||
(1, 19, _) => 0,
|
|
||||||
(1, 20, _) => 0,
|
|
||||||
(1, 21, _) => 0,
|
|
||||||
(1, 22, _) => 0,
|
|
||||||
(1, 23, _) => 0,
|
|
||||||
(1, 24, _) => 0,
|
|
||||||
(1, 25, _) => 0,
|
|
||||||
(1, 26, _) => 0,
|
|
||||||
(1, 27, _) => 0,
|
|
||||||
(1, 28, _) => 0,
|
|
||||||
(major, minor, patch) => {
|
|
||||||
if major > current_major
|
|
||||||
|| (major == current_major && minor > current_minor)
|
|
||||||
|| (major == current_major && minor == current_minor && patch > current_patch)
|
|
||||||
{
|
|
||||||
bail!(
|
|
||||||
"Database version {major}.{minor}.{patch} is higher than the Meilisearch version {current_major}.{current_minor}.{current_patch}. Downgrade is not supported",
|
|
||||||
);
|
|
||||||
} else if major < 1 || (major == current_major && minor < 12) {
|
|
||||||
bail!(
|
|
||||||
"Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{current_major}.{current_minor}.{current_patch}",
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
bail!("Unknown database version: v{major}.{minor}.{patch}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
info!("Upgrading the task queue");
|
|
||||||
let mut local_from = from;
|
|
||||||
for upgrade in upgrade_functions[start..].iter() {
|
|
||||||
let target = upgrade.target_version();
|
|
||||||
info!(
|
|
||||||
"Upgrading from v{}.{}.{} to v{}.{}.{}",
|
|
||||||
local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
|
|
||||||
);
|
|
||||||
let mut wtxn = env.write_txn()?;
|
|
||||||
upgrade.upgrade(env, &mut wtxn, local_from)?;
|
|
||||||
versioning.set_version(&mut wtxn, target)?;
|
|
||||||
wtxn.commit()?;
|
|
||||||
local_from = target;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
|
||||||
|
// List all upgrade functions to apply in order here.
|
||||||
|
];
|
||||||
|
|
||||||
|
let (initial_major, initial_minor, initial_patch) = initial_version;
|
||||||
|
|
||||||
|
if initial_version > target_version {
|
||||||
|
bail!(
|
||||||
|
"Database version {initial_major}.{initial_minor}.{initial_patch} is higher than the Meilisearch version {target_major}.{target_minor}.{target_patch}. Downgrade is not supported",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if initial_version < (1, 12, 0) {
|
||||||
|
bail!(
|
||||||
|
"Database version {initial_major}.{initial_minor}.{initial_patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{initial_major}.{initial_minor}.{initial_patch} and import it in the v{target_major}.{target_minor}.{target_patch}",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Upgrading the task queue");
|
||||||
let mut wtxn = env.write_txn()?;
|
let mut wtxn = env.write_txn()?;
|
||||||
|
let migration_count = upgrade_functions.len();
|
||||||
|
for (migration_index, upgrade) in upgrade_functions.iter().enumerate() {
|
||||||
|
if upgrade.must_upgrade(initial_version) {
|
||||||
|
info!(
|
||||||
|
"[{migration_index}/{migration_count}]Applying migration: {}",
|
||||||
|
upgrade.description()
|
||||||
|
);
|
||||||
|
|
||||||
|
upgrade.upgrade(env, &mut wtxn)?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"[{}/{migration_count}]Migration applied: {}",
|
||||||
|
migration_index + 1,
|
||||||
|
upgrade.description()
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
info!(
|
||||||
|
"[{migration_index}/{migration_count}]Skipping unnecessary migration: {}",
|
||||||
|
upgrade.description()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
versioning.set_version(&mut wtxn, target_version)?;
|
||||||
|
info!("Task queue upgraded, spawning the upgrade database task");
|
||||||
|
|
||||||
let queue = TaskQueue::new(env, &mut wtxn)?;
|
let queue = TaskQueue::new(env, &mut wtxn)?;
|
||||||
let uid = queue.next_task_id(&wtxn)?;
|
let uid = queue.next_task_id(&wtxn)?;
|
||||||
queue.register(
|
queue.register(
|
||||||
@@ -98,9 +97,9 @@ pub fn upgrade_index_scheduler(
|
|||||||
finished_at: None,
|
finished_at: None,
|
||||||
error: None,
|
error: None,
|
||||||
canceled_by: None,
|
canceled_by: None,
|
||||||
details: Some(Details::UpgradeDatabase { from, to }),
|
details: Some(Details::UpgradeDatabase { from: initial_version, to: target_version }),
|
||||||
status: Status::Enqueued,
|
status: Status::Enqueued,
|
||||||
kind: KindWithContent::UpgradeDatabase { from },
|
kind: KindWithContent::UpgradeDatabase { from: initial_version },
|
||||||
network: None,
|
network: None,
|
||||||
custom_metadata: None,
|
custom_metadata: None,
|
||||||
},
|
},
|
||||||
@@ -109,21 +108,3 @@ pub fn upgrade_index_scheduler(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
struct ToCurrentNoOp {}
|
|
||||||
|
|
||||||
impl UpgradeIndexScheduler for ToCurrentNoOp {
|
|
||||||
fn upgrade(
|
|
||||||
&self,
|
|
||||||
_env: &Env<WithoutTls>,
|
|
||||||
_wtxn: &mut RwTxn,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
|
||||||
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -64,14 +64,7 @@ impl Versioning {
|
|||||||
};
|
};
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
let bin_major: u32 = versioning::VERSION_MAJOR;
|
upgrade_index_scheduler(env, &this, from)?;
|
||||||
let bin_minor: u32 = versioning::VERSION_MINOR;
|
|
||||||
let bin_patch: u32 = versioning::VERSION_PATCH;
|
|
||||||
let to = (bin_major, bin_minor, bin_patch);
|
|
||||||
|
|
||||||
if from != to {
|
|
||||||
upgrade_index_scheduler(env, &this, from, to)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Once we reach this point it means the upgrade process, if there was one is entirely finished
|
// Once we reach this point it means the upgrade process, if there was one is entirely finished
|
||||||
// we can safely say we reached the latest version of the index scheduler
|
// we can safely say we reached the latest version of the index scheduler
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct Network {
|
pub struct Network {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::any::TypeId;
|
use std::any::TypeId;
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
@@ -344,14 +344,14 @@ impl Infos {
|
|||||||
experimental_no_edition_2024_for_dumps,
|
experimental_no_edition_2024_for_dumps,
|
||||||
experimental_vector_store_setting: vector_store_setting,
|
experimental_vector_store_setting: vector_store_setting,
|
||||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||||
db_path: db_path != PathBuf::from("./data.ms"),
|
db_path: db_path != Path::new("./data.ms"),
|
||||||
import_dump: import_dump.is_some(),
|
import_dump: import_dump.is_some(),
|
||||||
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
dump_dir: dump_dir != Path::new("dumps/"),
|
||||||
ignore_missing_dump,
|
ignore_missing_dump,
|
||||||
ignore_dump_if_db_exists,
|
ignore_dump_if_db_exists,
|
||||||
import_snapshot: import_snapshot.is_some(),
|
import_snapshot: import_snapshot.is_some(),
|
||||||
schedule_snapshot,
|
schedule_snapshot,
|
||||||
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
snapshot_dir: snapshot_dir != Path::new("snapshots/"),
|
||||||
uses_s3_snapshots: s3_snapshot_options.is_some(),
|
uses_s3_snapshots: s3_snapshot_options.is_some(),
|
||||||
ignore_missing_snapshot,
|
ignore_missing_snapshot,
|
||||||
ignore_snapshot_if_db_exists,
|
ignore_snapshot_if_db_exists,
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
/// Compact an index
|
/// Compact an index
|
||||||
#[utoipa::path(
|
#[utoipa::path(
|
||||||
post,
|
post,
|
||||||
path = "{indexUid}/compact",
|
path = "/{indexUid}/compact",
|
||||||
tag = "Compact an index",
|
tag = "Compact an index",
|
||||||
security(("Bearer" = ["search", "*"])),
|
security(("Bearer" = ["search", "*"])),
|
||||||
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
|
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
|
||||||
|
|||||||
@@ -183,7 +183,11 @@ pub async fn get_metrics(
|
|||||||
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS.reset();
|
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS.reset();
|
||||||
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
|
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
|
||||||
// Fetch the finished batches...
|
// Fetch the finished batches...
|
||||||
&Query { statuses: Some(vec![Status::Succeeded, Status::Failed]), ..Query::default() },
|
&Query {
|
||||||
|
statuses: Some(vec![Status::Succeeded, Status::Failed]),
|
||||||
|
limit: Some(1),
|
||||||
|
..Query::default()
|
||||||
|
},
|
||||||
auth_filters,
|
auth_filters,
|
||||||
)?;
|
)?;
|
||||||
// ...and get the last batch only.
|
// ...and get the last batch only.
|
||||||
|
|||||||
@@ -789,11 +789,12 @@ impl TryFrom<Value> for ExternalDocumentId {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)]
|
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)]
|
||||||
#[deserr(rename_all = camelCase)]
|
#[deserr(rename_all = camelCase)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub enum MatchingStrategy {
|
pub enum MatchingStrategy {
|
||||||
/// Remove query words from last to first
|
/// Remove query words from last to first
|
||||||
|
#[default]
|
||||||
Last,
|
Last,
|
||||||
/// All query words are mandatory
|
/// All query words are mandatory
|
||||||
All,
|
All,
|
||||||
@@ -801,12 +802,6 @@ pub enum MatchingStrategy {
|
|||||||
Frequency,
|
Frequency,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for MatchingStrategy {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::Last
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
||||||
fn from(other: MatchingStrategy) -> Self {
|
fn from(other: MatchingStrategy) -> Self {
|
||||||
match other {
|
match other {
|
||||||
|
|||||||
@@ -187,7 +187,7 @@ macro_rules! compute_forbidden_search {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn search_authorized_simple_token() {
|
async fn search_authorized_simple_token() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"*": {}}),
|
"searchRules" => json!({"*": {}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -239,7 +239,7 @@ async fn search_authorized_simple_token() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn search_authorized_filter_token() {
|
async fn search_authorized_filter_token() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"*": {"filter": "color = blue"}}),
|
"searchRules" => json!({"*": {"filter": "color = blue"}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -292,7 +292,7 @@ async fn search_authorized_filter_token() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn filter_search_authorized_filter_token() {
|
async fn filter_search_authorized_filter_token() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"*": {"filter": "color = blue"}}),
|
"searchRules" => json!({"*": {"filter": "color = blue"}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -353,7 +353,7 @@ async fn filter_search_authorized_filter_token() {
|
|||||||
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
|
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn error_search_token_forbidden_parent_key() {
|
async fn error_search_token_forbidden_parent_key() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"*": {}}),
|
"searchRules" => json!({"*": {}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -389,7 +389,7 @@ async fn error_search_token_forbidden_parent_key() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn error_search_forbidden_token() {
|
async fn error_search_forbidden_token() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
// bad index
|
// bad index
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"products": {}}),
|
"searchRules" => json!({"products": {}}),
|
||||||
|
|||||||
@@ -680,7 +680,7 @@ async fn multi_search_authorized_simple_token() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn single_search_authorized_filter_token() {
|
async fn single_search_authorized_filter_token() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"*": {"filter": "color = blue"}}),
|
"searchRules" => json!({"*": {"filter": "color = blue"}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -733,7 +733,7 @@ async fn single_search_authorized_filter_token() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn multi_search_authorized_filter_token() {
|
async fn multi_search_authorized_filter_token() {
|
||||||
let both_tenant_tokens = vec![
|
let both_tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"sales": {"filter": "color = blue"}, "products": {"filter": "doggos.age <= 5"}}),
|
"searchRules" => json!({"sales": {"filter": "color = blue"}, "products": {"filter": "doggos.age <= 5"}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -842,7 +842,7 @@ async fn filter_single_search_authorized_filter_token() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn filter_multi_search_authorized_filter_token() {
|
async fn filter_multi_search_authorized_filter_token() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"sales": {"filter": "color = blue"}, "products": {"filter": "doggos.age <= 5"}}),
|
"searchRules" => json!({"sales": {"filter": "color = blue"}, "products": {"filter": "doggos.age <= 5"}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -900,7 +900,7 @@ async fn filter_multi_search_authorized_filter_token() {
|
|||||||
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
|
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn error_single_search_token_forbidden_parent_key() {
|
async fn error_single_search_token_forbidden_parent_key() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"*": {}}),
|
"searchRules" => json!({"*": {}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
@@ -941,7 +941,7 @@ async fn error_single_search_token_forbidden_parent_key() {
|
|||||||
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
|
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn error_multi_search_token_forbidden_parent_key() {
|
async fn error_multi_search_token_forbidden_parent_key() {
|
||||||
let tenant_tokens = vec![
|
let tenant_tokens = [
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"searchRules" => json!({"*": {}}),
|
"searchRules" => json!({"*": {}}),
|
||||||
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ test_setting_routes!(
|
|||||||
{
|
{
|
||||||
setting: vector_store,
|
setting: vector_store,
|
||||||
update_verb: patch,
|
update_verb: patch,
|
||||||
default_value: null
|
default_value: "experimental"
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ mod chat;
|
|||||||
mod distinct;
|
mod distinct;
|
||||||
mod errors;
|
mod errors;
|
||||||
mod get_settings;
|
mod get_settings;
|
||||||
|
mod parent_seachable_fields;
|
||||||
mod prefix_search_settings;
|
mod prefix_search_settings;
|
||||||
mod proximity_settings;
|
mod proximity_settings;
|
||||||
mod tokenizer_customization;
|
mod tokenizer_customization;
|
||||||
|
|||||||
114
crates/meilisearch/tests/settings/parent_seachable_fields.rs
Normal file
114
crates/meilisearch/tests/settings/parent_seachable_fields.rs
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
use crate::common::Server;
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of the day",
|
||||||
|
"description": "many the fish",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of day",
|
||||||
|
"description": "many the lazy fish",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"meta": {
|
||||||
|
"title": "the Soup of day",
|
||||||
|
"description": "many the fish",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
])
|
||||||
|
});
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn nested_field_becomes_searchable() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["meta.title"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{response:?}");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// We expect no documents when searching for
|
||||||
|
// a nested non-searchable field
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"[]"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["meta.title", "meta.description"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{response:?}");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// We expect all the documents when the nested field becomes searchable
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of the day",
|
||||||
|
"description": "many the fish"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"meta": {
|
||||||
|
"title": "the Soup of day",
|
||||||
|
"description": "many the fish"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"meta": {
|
||||||
|
"title": "Soup of day",
|
||||||
|
"description": "many the lazy fish"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["meta.title"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{response:?}");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// We expect no documents when searching for
|
||||||
|
// a nested non-searchable field
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"[]"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
@@ -42,8 +42,16 @@ async fn version_too_old() {
|
|||||||
std::fs::create_dir_all(&db_path).unwrap();
|
std::fs::create_dir_all(&db_path).unwrap();
|
||||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err().to_string();
|
||||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.28.1");
|
|
||||||
|
let major = meilisearch_types::versioning::VERSION_MAJOR;
|
||||||
|
let minor = meilisearch_types::versioning::VERSION_MINOR;
|
||||||
|
let patch = meilisearch_types::versioning::VERSION_PATCH;
|
||||||
|
|
||||||
|
let current_version = format!("{major}.{minor}.{patch}");
|
||||||
|
let err = err.replace(¤t_version, "[current version]");
|
||||||
|
|
||||||
|
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v[current version]");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@@ -54,11 +62,21 @@ async fn version_requires_downgrade() {
|
|||||||
std::fs::create_dir_all(&db_path).unwrap();
|
std::fs::create_dir_all(&db_path).unwrap();
|
||||||
let major = meilisearch_types::versioning::VERSION_MAJOR;
|
let major = meilisearch_types::versioning::VERSION_MAJOR;
|
||||||
let minor = meilisearch_types::versioning::VERSION_MINOR;
|
let minor = meilisearch_types::versioning::VERSION_MINOR;
|
||||||
let patch = meilisearch_types::versioning::VERSION_PATCH + 1;
|
let mut patch = meilisearch_types::versioning::VERSION_PATCH;
|
||||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
|
||||||
|
let current_version = format!("{major}.{minor}.{patch}");
|
||||||
|
patch += 1;
|
||||||
|
let future_version = format!("{major}.{minor}.{patch}");
|
||||||
|
|
||||||
|
std::fs::write(db_path.join("VERSION"), &future_version).unwrap();
|
||||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||||
snapshot!(err, @"Database version 1.28.2 is higher than the Meilisearch version 1.28.1. Downgrade is not supported");
|
|
||||||
|
let err = err.to_string();
|
||||||
|
let err = err.replace(¤t_version, "[current version]");
|
||||||
|
let err = err.replace(&future_version, "[future version]");
|
||||||
|
|
||||||
|
snapshot!(err, @"Database version [future version] is higher than the Meilisearch version [current version]. Downgrade is not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.28.1"
|
"upgradeTo": "[current version]"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -166,55 +166,55 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
|
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
|
||||||
// The other tasks should NOT change
|
// The other tasks should NOT change
|
||||||
let (tasks, _) = server.tasks_filter("limit=1000").await;
|
let (tasks, _) = server.tasks_filter("limit=1000").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
|
||||||
let (batches, _) = server.batches_filter("limit=1000").await;
|
let (batches, _) = server.batches_filter("limit=1000").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
|
||||||
|
|
||||||
// Tests all the tasks query parameters
|
// Tests all the tasks query parameters
|
||||||
let (tasks, _) = server.tasks_filter("uids=10").await;
|
let (tasks, _) = server.tasks_filter("uids=10").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
|
||||||
let (tasks, _) = server.tasks_filter("batchUids=10").await;
|
let (tasks, _) = server.tasks_filter("batchUids=10").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
|
||||||
let (tasks, _) = server.tasks_filter("statuses=canceled").await;
|
let (tasks, _) = server.tasks_filter("statuses=canceled").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
|
||||||
// types has already been tested above to retrieve the upgrade database
|
// types has already been tested above to retrieve the upgrade database
|
||||||
let (tasks, _) = server.tasks_filter("canceledBy=19").await;
|
let (tasks, _) = server.tasks_filter("canceledBy=19").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
|
||||||
let (tasks, _) = server.tasks_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
|
let (tasks, _) = server.tasks_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||||
let (tasks, _) = server.tasks_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
|
let (tasks, _) = server.tasks_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||||
let (tasks, _) = server.tasks_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
|
let (tasks, _) = server.tasks_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
|
||||||
let (tasks, _) = server.tasks_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
|
let (tasks, _) = server.tasks_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
|
||||||
let (tasks, _) = server.tasks_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
|
let (tasks, _) = server.tasks_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
|
||||||
let (tasks, _) = server.tasks_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
|
let (tasks, _) = server.tasks_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
|
||||||
|
|
||||||
// Tests all the batches query parameters
|
// Tests all the batches query parameters
|
||||||
let (batches, _) = server.batches_filter("uids=10").await;
|
let (batches, _) = server.batches_filter("uids=10").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
|
||||||
let (batches, _) = server.batches_filter("batchUids=10").await;
|
let (batches, _) = server.batches_filter("batchUids=10").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
|
||||||
let (batches, _) = server.batches_filter("statuses=canceled").await;
|
let (batches, _) = server.batches_filter("statuses=canceled").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
|
||||||
// types has already been tested above to retrieve the upgrade database
|
// types has already been tested above to retrieve the upgrade database
|
||||||
let (batches, _) = server.batches_filter("canceledBy=19").await;
|
let (batches, _) = server.batches_filter("canceledBy=19").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
|
||||||
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
|
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||||
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
|
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||||
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
|
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
|
||||||
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
|
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
|
||||||
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
|
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
|
||||||
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
|
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
|
||||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
|
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
|
||||||
|
|
||||||
let (stats, _) = server.stats().await;
|
let (stats, _) = server.stats().await;
|
||||||
assert_json_snapshot!(stats, {
|
assert_json_snapshot!(stats, {
|
||||||
|
|||||||
@@ -104,8 +104,8 @@ async fn binary_quantize_before_sending_documents() {
|
|||||||
"manual": {
|
"manual": {
|
||||||
"embeddings": [
|
"embeddings": [
|
||||||
[
|
[
|
||||||
-1.0,
|
0.0,
|
||||||
-1.0,
|
0.0,
|
||||||
1.0
|
1.0
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
@@ -122,7 +122,7 @@ async fn binary_quantize_before_sending_documents() {
|
|||||||
[
|
[
|
||||||
1.0,
|
1.0,
|
||||||
1.0,
|
1.0,
|
||||||
-1.0
|
0.0
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"regenerate": false
|
"regenerate": false
|
||||||
@@ -191,8 +191,8 @@ async fn binary_quantize_after_sending_documents() {
|
|||||||
"manual": {
|
"manual": {
|
||||||
"embeddings": [
|
"embeddings": [
|
||||||
[
|
[
|
||||||
-1.0,
|
0.0,
|
||||||
-1.0,
|
0.0,
|
||||||
1.0
|
1.0
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
@@ -209,7 +209,7 @@ async fn binary_quantize_after_sending_documents() {
|
|||||||
[
|
[
|
||||||
1.0,
|
1.0,
|
||||||
1.0,
|
1.0,
|
||||||
-1.0
|
0.0
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"regenerate": false
|
"regenerate": false
|
||||||
|
|||||||
43
crates/meilisearch/tests/vector/huggingface.rs
Normal file
43
crates/meilisearch/tests/vector/huggingface.rs
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
use meili_snap::snapshot;
|
||||||
|
|
||||||
|
use crate::common::{GetAllDocumentsOptions, Server};
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn hf_bge_m3_force_cls_settings() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"default": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "baai/bge-m3",
|
||||||
|
"revision": "5617a9f61b028005a4858fdac845db406aefb181",
|
||||||
|
"pooling": "forceCls",
|
||||||
|
// minimal template to allow potential document embedding if used later
|
||||||
|
"documentTemplate": "{{doc.title}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Try to embed one simple document
|
||||||
|
let (task, code) =
|
||||||
|
index.add_documents(json!([{ "id": 1, "title": "Hello world" }]), None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Retrieve the document with vectors and assert embeddings were produced
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
let has_vectors = documents["results"][0]["_vectors"]["default"]["embeddings"]
|
||||||
|
.as_array()
|
||||||
|
.map(|a| !a.is_empty())
|
||||||
|
.unwrap_or(false);
|
||||||
|
snapshot!(has_vectors, @"true");
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
mod binary_quantized;
|
mod binary_quantized;
|
||||||
mod fragments;
|
mod fragments;
|
||||||
|
mod huggingface;
|
||||||
#[cfg(feature = "test-ollama")]
|
#[cfg(feature = "test-ollama")]
|
||||||
mod ollama;
|
mod ollama;
|
||||||
mod openai;
|
mod openai;
|
||||||
|
|||||||
@@ -500,13 +500,6 @@ async fn test_both_apis() {
|
|||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"name": "kefir",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 2023,
|
|
||||||
"breed": "Patou"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"name": "Vénus",
|
"name": "Vénus",
|
||||||
@@ -527,6 +520,13 @@ async fn test_both_apis() {
|
|||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 1995,
|
"birthyear": 1995,
|
||||||
"breed": "Labrador Retriever"
|
"breed": "Labrador Retriever"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"gender": "M",
|
||||||
|
"birthyear": 2023,
|
||||||
|
"breed": "Patou"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@@ -540,13 +540,6 @@ async fn test_both_apis() {
|
|||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"name": "kefir",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 2023,
|
|
||||||
"breed": "Patou"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"name": "Vénus",
|
"name": "Vénus",
|
||||||
@@ -567,6 +560,13 @@ async fn test_both_apis() {
|
|||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 1995,
|
"birthyear": 1995,
|
||||||
"breed": "Labrador Retriever"
|
"breed": "Labrador Retriever"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"gender": "M",
|
||||||
|
"birthyear": 2023,
|
||||||
|
"breed": "Patou"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@@ -581,18 +581,11 @@ async fn test_both_apis() {
|
|||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 1,
|
||||||
"name": "kefir",
|
"name": "Intel",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2023,
|
"birthyear": 2011,
|
||||||
"breed": "Patou"
|
"breed": "Beagle"
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"name": "Max",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 1995,
|
|
||||||
"breed": "Labrador Retriever"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
@@ -602,11 +595,18 @@ async fn test_both_apis() {
|
|||||||
"breed": "Jack Russel Terrier"
|
"breed": "Jack Russel Terrier"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 3,
|
||||||
"name": "Intel",
|
"name": "Max",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2011,
|
"birthyear": 1995,
|
||||||
"breed": "Beagle"
|
"breed": "Labrador Retriever"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"gender": "M",
|
||||||
|
"birthyear": 2023,
|
||||||
|
"breed": "Patou"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@@ -621,18 +621,11 @@ async fn test_both_apis() {
|
|||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 1,
|
||||||
"name": "kefir",
|
"name": "Intel",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2023,
|
"birthyear": 2011,
|
||||||
"breed": "Patou"
|
"breed": "Beagle"
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"name": "Max",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 1995,
|
|
||||||
"breed": "Labrador Retriever"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
@@ -642,11 +635,18 @@ async fn test_both_apis() {
|
|||||||
"breed": "Jack Russel Terrier"
|
"breed": "Jack Russel Terrier"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 3,
|
||||||
"name": "Intel",
|
"name": "Max",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2011,
|
"birthyear": 1995,
|
||||||
"breed": "Beagle"
|
"breed": "Labrador Retriever"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"gender": "M",
|
||||||
|
"birthyear": 2023,
|
||||||
|
"breed": "Patou"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@@ -661,18 +661,11 @@ async fn test_both_apis() {
|
|||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 1,
|
||||||
"name": "kefir",
|
"name": "Intel",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2023,
|
"birthyear": 2011,
|
||||||
"breed": "Patou"
|
"breed": "Beagle"
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"name": "Max",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 1995,
|
|
||||||
"breed": "Labrador Retriever"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
@@ -682,11 +675,18 @@ async fn test_both_apis() {
|
|||||||
"breed": "Jack Russel Terrier"
|
"breed": "Jack Russel Terrier"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 3,
|
||||||
"name": "Intel",
|
"name": "Max",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2011,
|
"birthyear": 1995,
|
||||||
"breed": "Beagle"
|
"breed": "Labrador Retriever"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"gender": "M",
|
||||||
|
"birthyear": 2023,
|
||||||
|
"breed": "Patou"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@@ -701,18 +701,11 @@ async fn test_both_apis() {
|
|||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 1,
|
||||||
"name": "kefir",
|
"name": "Intel",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2023,
|
"birthyear": 2011,
|
||||||
"breed": "Patou"
|
"breed": "Beagle"
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"name": "Max",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 1995,
|
|
||||||
"breed": "Labrador Retriever"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
@@ -722,11 +715,18 @@ async fn test_both_apis() {
|
|||||||
"breed": "Jack Russel Terrier"
|
"breed": "Jack Russel Terrier"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 3,
|
||||||
"name": "Intel",
|
"name": "Max",
|
||||||
"gender": "M",
|
"gender": "M",
|
||||||
"birthyear": 2011,
|
"birthyear": 1995,
|
||||||
"breed": "Beagle"
|
"breed": "Labrador Retriever"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"gender": "M",
|
||||||
|
"birthyear": 2023,
|
||||||
|
"breed": "Patou"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
|
|||||||
"sync",
|
"sync",
|
||||||
] }
|
] }
|
||||||
arroy = "0.6.4-nested-rtxns"
|
arroy = "0.6.4-nested-rtxns"
|
||||||
hannoy = { version = "0.0.9-nested-rtxns-2", features = ["arroy"] }
|
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
ureq = { version = "2.12.1", features = ["json"] }
|
ureq = { version = "2.12.1", features = ["json"] }
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ use crate::{
|
|||||||
pub struct Metadata {
|
pub struct Metadata {
|
||||||
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
|
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
|
||||||
pub searchable: Option<Weight>,
|
pub searchable: Option<Weight>,
|
||||||
|
/// The field is part of the exact attributes.
|
||||||
|
pub exact: bool,
|
||||||
/// The field is part of the sortable attributes.
|
/// The field is part of the sortable attributes.
|
||||||
pub sortable: bool,
|
pub sortable: bool,
|
||||||
/// The field is defined as the distinct attribute.
|
/// The field is defined as the distinct attribute.
|
||||||
@@ -209,6 +211,7 @@ impl Metadata {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct MetadataBuilder {
|
pub struct MetadataBuilder {
|
||||||
searchable_attributes: Option<Vec<String>>,
|
searchable_attributes: Option<Vec<String>>,
|
||||||
|
exact_searchable_attributes: Vec<String>,
|
||||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||||
sortable_attributes: HashSet<String>,
|
sortable_attributes: HashSet<String>,
|
||||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||||
@@ -220,15 +223,18 @@ impl MetadataBuilder {
|
|||||||
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||||
let searchable_attributes = index
|
let searchable_attributes = index
|
||||||
.user_defined_searchable_fields(rtxn)?
|
.user_defined_searchable_fields(rtxn)?
|
||||||
.map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
|
let exact_searchable_attributes =
|
||||||
|
index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
|
||||||
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
|
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
|
||||||
let sortable_attributes = index.sortable_fields(rtxn)?;
|
let sortable_attributes = index.sortable_fields(rtxn)?;
|
||||||
let localized_attributes = index.localized_attributes_rules(rtxn)?;
|
let localized_attributes = index.localized_attributes_rules(rtxn)?;
|
||||||
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
|
let distinct_attribute = index.distinct_field(rtxn)?.map(String::from);
|
||||||
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
|
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
|
||||||
|
|
||||||
Ok(Self::new(
|
Ok(Self::new(
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
|
exact_searchable_attributes,
|
||||||
filterable_attributes,
|
filterable_attributes,
|
||||||
sortable_attributes,
|
sortable_attributes,
|
||||||
localized_attributes,
|
localized_attributes,
|
||||||
@@ -242,6 +248,7 @@ impl MetadataBuilder {
|
|||||||
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
|
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
|
||||||
pub fn new(
|
pub fn new(
|
||||||
searchable_attributes: Option<Vec<String>>,
|
searchable_attributes: Option<Vec<String>>,
|
||||||
|
exact_searchable_attributes: Vec<String>,
|
||||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||||
sortable_attributes: HashSet<String>,
|
sortable_attributes: HashSet<String>,
|
||||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||||
@@ -256,6 +263,7 @@ impl MetadataBuilder {
|
|||||||
|
|
||||||
Self {
|
Self {
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
|
exact_searchable_attributes,
|
||||||
filterable_attributes,
|
filterable_attributes,
|
||||||
sortable_attributes,
|
sortable_attributes,
|
||||||
localized_attributes,
|
localized_attributes,
|
||||||
@@ -269,6 +277,7 @@ impl MetadataBuilder {
|
|||||||
// Vectors fields are not searchable, filterable, distinct or asc_desc
|
// Vectors fields are not searchable, filterable, distinct or asc_desc
|
||||||
return Metadata {
|
return Metadata {
|
||||||
searchable: None,
|
searchable: None,
|
||||||
|
exact: false,
|
||||||
sortable: false,
|
sortable: false,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
@@ -296,6 +305,7 @@ impl MetadataBuilder {
|
|||||||
// Geo fields are not searchable, distinct or asc_desc
|
// Geo fields are not searchable, distinct or asc_desc
|
||||||
return Metadata {
|
return Metadata {
|
||||||
searchable: None,
|
searchable: None,
|
||||||
|
exact: false,
|
||||||
sortable,
|
sortable,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
@@ -309,6 +319,7 @@ impl MetadataBuilder {
|
|||||||
debug_assert!(!sortable, "geojson fields should not be sortable");
|
debug_assert!(!sortable, "geojson fields should not be sortable");
|
||||||
return Metadata {
|
return Metadata {
|
||||||
searchable: None,
|
searchable: None,
|
||||||
|
exact: false,
|
||||||
sortable,
|
sortable,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
@@ -329,6 +340,8 @@ impl MetadataBuilder {
|
|||||||
None => Some(0),
|
None => Some(0),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let exact = self.exact_searchable_attributes.iter().any(|attr| is_faceted_by(field, attr));
|
||||||
|
|
||||||
let distinct =
|
let distinct =
|
||||||
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
|
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
|
||||||
let asc_desc = self.asc_desc_attributes.contains(field);
|
let asc_desc = self.asc_desc_attributes.contains(field);
|
||||||
@@ -343,6 +356,7 @@ impl MetadataBuilder {
|
|||||||
|
|
||||||
Metadata {
|
Metadata {
|
||||||
searchable,
|
searchable,
|
||||||
|
exact,
|
||||||
sortable,
|
sortable,
|
||||||
distinct,
|
distinct,
|
||||||
asc_desc,
|
asc_desc,
|
||||||
|
|||||||
@@ -281,6 +281,9 @@ impl Index {
|
|||||||
&mut wtxn,
|
&mut wtxn,
|
||||||
(constants::VERSION_MAJOR, constants::VERSION_MINOR, constants::VERSION_PATCH),
|
(constants::VERSION_MAJOR, constants::VERSION_MINOR, constants::VERSION_PATCH),
|
||||||
)?;
|
)?;
|
||||||
|
// The database before v1.29 defaulted to using arroy, so we
|
||||||
|
// need to set it explicitly because the new default is hannoy.
|
||||||
|
this.put_vector_store(&mut wtxn, VectorStoreBackend::Hannoy)?;
|
||||||
}
|
}
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
|
|||||||
@@ -385,9 +385,10 @@ pub struct SearchResult {
|
|||||||
pub query_vector: Option<Embedding>,
|
pub query_vector: Option<Embedding>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum TermsMatchingStrategy {
|
pub enum TermsMatchingStrategy {
|
||||||
// remove last word first
|
// remove last word first
|
||||||
|
#[default]
|
||||||
Last,
|
Last,
|
||||||
// all words are mandatory
|
// all words are mandatory
|
||||||
All,
|
All,
|
||||||
@@ -395,12 +396,6 @@ pub enum TermsMatchingStrategy {
|
|||||||
Frequency,
|
Frequency,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for TermsMatchingStrategy {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::Last
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
||||||
fn from(other: MatchingStrategy) -> Self {
|
fn from(other: MatchingStrategy) -> Self {
|
||||||
match other {
|
match other {
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ impl GrenadParameters {
|
|||||||
/// This should be called inside of a rayon thread pool,
|
/// This should be called inside of a rayon thread pool,
|
||||||
/// otherwise, it will take the global number of threads.
|
/// otherwise, it will take the global number of threads.
|
||||||
pub fn max_memory_by_thread(&self) -> Option<usize> {
|
pub fn max_memory_by_thread(&self) -> Option<usize> {
|
||||||
self.max_memory.map(|max_memory| (max_memory / rayon::current_num_threads()))
|
self.max_memory.map(|max_memory| max_memory / rayon::current_num_threads())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -54,11 +54,12 @@ pub struct DocumentAdditionResult {
|
|||||||
pub number_of_documents: u64,
|
pub number_of_documents: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
pub enum IndexDocumentsMethod {
|
pub enum IndexDocumentsMethod {
|
||||||
/// Replace the previous document with the new one,
|
/// Replace the previous document with the new one,
|
||||||
/// removing all the already known attributes.
|
/// removing all the already known attributes.
|
||||||
|
#[default]
|
||||||
ReplaceDocuments,
|
ReplaceDocuments,
|
||||||
|
|
||||||
/// Merge the previous version of the document with the new version,
|
/// Merge the previous version of the document with the new version,
|
||||||
@@ -66,12 +67,6 @@ pub enum IndexDocumentsMethod {
|
|||||||
UpdateDocuments,
|
UpdateDocuments,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for IndexDocumentsMethod {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::ReplaceDocuments
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i>,
|
wtxn: &'t mut heed::RwTxn<'i>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
@@ -806,6 +801,10 @@ mod tests {
|
|||||||
use crate::vector::db::IndexEmbeddingConfig;
|
use crate::vector::db::IndexEmbeddingConfig;
|
||||||
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
|
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
|
||||||
|
|
||||||
|
fn no_cancel() -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn simple_document_replacement() {
|
fn simple_document_replacement() {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
@@ -1985,7 +1984,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2038,7 +2037,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2057,7 +2056,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2127,7 +2126,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2146,7 +2145,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
RuntimeEmbedders::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2317,7 +2316,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2333,7 +2332,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2381,7 +2380,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2397,7 +2396,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2436,7 +2435,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2452,7 +2451,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2490,7 +2489,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2506,7 +2505,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2546,7 +2545,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2562,7 +2561,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2607,7 +2606,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2623,7 +2622,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2661,7 +2660,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2677,7 +2676,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2715,7 +2714,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2731,7 +2730,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2927,7 +2926,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -2943,7 +2942,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -2988,7 +2987,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -3004,7 +3003,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
@@ -3046,7 +3045,7 @@ mod tests {
|
|||||||
&rtxn,
|
&rtxn,
|
||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
Progress::default(),
|
Progress::default(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
@@ -3062,7 +3061,7 @@ mod tests {
|
|||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&no_cancel,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
&Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -8,17 +8,26 @@ use bumpalo::Bump;
|
|||||||
|
|
||||||
use super::match_searchable_field;
|
use super::match_searchable_field;
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
use crate::fields_ids_map::metadata::Metadata;
|
||||||
use crate::update::new::document::DocumentContext;
|
use crate::update::new::document::DocumentContext;
|
||||||
use crate::update::new::extract::cache::BalancedCaches;
|
use crate::update::new::extract::cache::BalancedCaches;
|
||||||
use crate::update::new::extract::perm_json_p::contained_in;
|
use crate::update::new::extract::perm_json_p::contained_in;
|
||||||
|
use crate::update::new::extract::searchable::has_searchable_children;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChanges, Extractor, IndexingContext,
|
extract, DocumentChanges, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
|
use crate::update::new::indexer::settings_changes::{
|
||||||
|
settings_change_extract, DocumentsIndentifiers, SettingsChangeExtractor,
|
||||||
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::{DocumentChange, DocumentIdentifiers};
|
||||||
use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
|
use crate::update::settings::SettingsDelta;
|
||||||
|
use crate::{
|
||||||
|
bucketed_position, DocumentId, FieldId, PatternMatch, Result, UserError,
|
||||||
|
MAX_POSITION_PER_ATTRIBUTE,
|
||||||
|
};
|
||||||
|
|
||||||
const MAX_COUNTED_WORDS: usize = 30;
|
const MAX_COUNTED_WORDS: usize = 30;
|
||||||
|
|
||||||
@@ -34,6 +43,15 @@ pub struct WordDocidsBalancedCaches<'extractor> {
|
|||||||
|
|
||||||
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
|
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
|
||||||
|
|
||||||
|
/// Whether to extract or skip fields during word extraction.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum FieldDbExtraction {
|
||||||
|
/// Extract the word and put it in to the fid-based databases.
|
||||||
|
Extract,
|
||||||
|
/// Do not store the word in the fid-based databases.
|
||||||
|
Skip,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||||
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
|
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@@ -47,12 +65,14 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn insert_add_u32(
|
fn insert_add_u32(
|
||||||
&mut self,
|
&mut self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
position: u16,
|
position: u16,
|
||||||
word: &str,
|
word: &str,
|
||||||
exact: bool,
|
exact: bool,
|
||||||
|
field_db_extraction: FieldDbExtraction,
|
||||||
docid: u32,
|
docid: u32,
|
||||||
bump: &Bump,
|
bump: &Bump,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@@ -66,11 +86,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
||||||
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
||||||
|
|
||||||
buffer.clear();
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
buffer.extend_from_slice(word_bytes);
|
buffer.clear();
|
||||||
buffer.push(0);
|
buffer.extend_from_slice(word_bytes);
|
||||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
buffer.push(0);
|
||||||
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
|
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
|
||||||
|
}
|
||||||
|
|
||||||
let position = bucketed_position(position);
|
let position = bucketed_position(position);
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
@@ -83,21 +105,26 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
self.flush_fid_word_count(&mut buffer)?;
|
self.flush_fid_word_count(&mut buffer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.fid_word_count
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
.entry(field_id)
|
self.fid_word_count
|
||||||
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
.entry(field_id)
|
||||||
.or_insert((None, Some(1)));
|
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
||||||
|
.or_insert((None, Some(1)));
|
||||||
|
}
|
||||||
|
|
||||||
self.current_docid = Some(docid);
|
self.current_docid = Some(docid);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn insert_del_u32(
|
fn insert_del_u32(
|
||||||
&mut self,
|
&mut self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
position: u16,
|
position: u16,
|
||||||
word: &str,
|
word: &str,
|
||||||
exact: bool,
|
exact: bool,
|
||||||
|
field_db_extraction: FieldDbExtraction,
|
||||||
docid: u32,
|
docid: u32,
|
||||||
bump: &Bump,
|
bump: &Bump,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@@ -111,11 +138,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
||||||
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
||||||
|
|
||||||
buffer.clear();
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
buffer.extend_from_slice(word_bytes);
|
buffer.clear();
|
||||||
buffer.push(0);
|
buffer.extend_from_slice(word_bytes);
|
||||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
buffer.push(0);
|
||||||
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
|
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
|
||||||
|
}
|
||||||
|
|
||||||
let position = bucketed_position(position);
|
let position = bucketed_position(position);
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
@@ -128,10 +157,12 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
self.flush_fid_word_count(&mut buffer)?;
|
self.flush_fid_word_count(&mut buffer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.fid_word_count
|
if field_db_extraction == FieldDbExtraction::Extract {
|
||||||
.entry(field_id)
|
self.fid_word_count
|
||||||
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
.entry(field_id)
|
||||||
.or_insert((Some(1), None));
|
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
||||||
|
.or_insert((Some(1), None));
|
||||||
|
}
|
||||||
|
|
||||||
self.current_docid = Some(docid);
|
self.current_docid = Some(docid);
|
||||||
|
|
||||||
@@ -325,6 +356,24 @@ impl WordDocidsExtractors {
|
|||||||
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|
||||||
|| disabled_typos_terms.is_exact(word)
|
|| disabled_typos_terms.is_exact(word)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let Some((field_id, meta)) = new_fields_ids_map.id_with_metadata_or_insert(field_name)
|
||||||
|
else {
|
||||||
|
return Err(UserError::AttributeLimitReached.into());
|
||||||
|
};
|
||||||
|
|
||||||
|
let pattern_match = if meta.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
} else {
|
||||||
|
// TODO: should be a match on the field_name using `match_field_legacy` function,
|
||||||
|
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||||
|
PatternMatch::Parent
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, pattern_match))
|
||||||
|
};
|
||||||
|
|
||||||
match document_change {
|
match document_change {
|
||||||
DocumentChange::Deletion(inner) => {
|
DocumentChange::Deletion(inner) => {
|
||||||
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
||||||
@@ -333,13 +382,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -361,13 +411,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@@ -377,13 +428,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -394,13 +446,14 @@ impl WordDocidsExtractors {
|
|||||||
pos,
|
pos,
|
||||||
word,
|
word,
|
||||||
is_exact(fname, word),
|
is_exact(fname, word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
inner.docid(),
|
inner.docid(),
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.inserted(),
|
inner.inserted(),
|
||||||
new_fields_ids_map,
|
&mut should_tokenize,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -411,3 +464,292 @@ impl WordDocidsExtractors {
|
|||||||
cached_sorter.flush_fid_word_count(&mut buffer)
|
cached_sorter.flush_fid_word_count(&mut buffer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct WordDocidsSettingsExtractorsData<'a, SD> {
|
||||||
|
tokenizer: DocumentTokenizer<'a>,
|
||||||
|
max_memory_by_thread: Option<usize>,
|
||||||
|
buckets: usize,
|
||||||
|
settings_delta: &'a SD,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||||
|
for WordDocidsSettingsExtractorsData<'_, SD>
|
||||||
|
{
|
||||||
|
type Data = RefCell<Option<WordDocidsBalancedCaches<'extractor>>>;
|
||||||
|
|
||||||
|
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||||
|
Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
|
||||||
|
self.buckets,
|
||||||
|
self.max_memory_by_thread,
|
||||||
|
extractor_alloc,
|
||||||
|
))))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process<'doc>(
|
||||||
|
&'doc self,
|
||||||
|
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
|
||||||
|
context: &'doc DocumentContext<Self::Data>,
|
||||||
|
) -> crate::Result<()> {
|
||||||
|
for document in documents {
|
||||||
|
let document = document?;
|
||||||
|
SettingsChangeWordDocidsExtractors::extract_document_from_settings_change(
|
||||||
|
document,
|
||||||
|
context,
|
||||||
|
&self.tokenizer,
|
||||||
|
self.settings_delta,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SettingsChangeWordDocidsExtractors;
|
||||||
|
|
||||||
|
impl SettingsChangeWordDocidsExtractors {
|
||||||
|
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
|
||||||
|
settings_delta: &SD,
|
||||||
|
documents: &'indexer DocumentsIndentifiers<'indexer>,
|
||||||
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
|
step: IndexingStep,
|
||||||
|
) -> Result<WordDocidsCaches<'extractor>>
|
||||||
|
where
|
||||||
|
SD: SettingsDelta + Sync,
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
// Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
|
||||||
|
// TODO we need to read the new AND old settings to support changing global parameters
|
||||||
|
let rtxn = indexing_context.index.read_txn()?;
|
||||||
|
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||||
|
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||||
|
let allowed_separators: Option<Vec<_>> =
|
||||||
|
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let mut builder = tokenizer_builder(
|
||||||
|
stop_words.as_ref(),
|
||||||
|
allowed_separators.as_deref(),
|
||||||
|
dictionary.as_deref(),
|
||||||
|
);
|
||||||
|
let tokenizer = builder.build();
|
||||||
|
let localized_attributes_rules =
|
||||||
|
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||||
|
let document_tokenizer = DocumentTokenizer {
|
||||||
|
tokenizer: &tokenizer,
|
||||||
|
localized_attributes_rules: &localized_attributes_rules,
|
||||||
|
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||||
|
};
|
||||||
|
let extractor_data = WordDocidsSettingsExtractorsData {
|
||||||
|
tokenizer: document_tokenizer,
|
||||||
|
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||||
|
buckets: rayon::current_num_threads(),
|
||||||
|
settings_delta,
|
||||||
|
};
|
||||||
|
let datastore = ThreadLocal::new();
|
||||||
|
{
|
||||||
|
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
settings_change_extract(
|
||||||
|
documents,
|
||||||
|
&extractor_data,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
&datastore,
|
||||||
|
step,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut merger = WordDocidsCaches::new();
|
||||||
|
for cache in datastore.into_iter().flat_map(RefCell::into_inner) {
|
||||||
|
merger.push(cache)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(merger)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts document words from a settings change.
|
||||||
|
fn extract_document_from_settings_change<SD: SettingsDelta>(
|
||||||
|
document: DocumentIdentifiers<'_>,
|
||||||
|
context: &DocumentContext<RefCell<Option<WordDocidsBalancedCaches>>>,
|
||||||
|
document_tokenizer: &DocumentTokenizer,
|
||||||
|
settings_delta: &SD,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cached_sorter_ref = context.data.borrow_mut_or_yield();
|
||||||
|
let cached_sorter = cached_sorter_ref.as_mut().unwrap();
|
||||||
|
let doc_alloc = &context.doc_alloc;
|
||||||
|
|
||||||
|
let new_fields_ids_map = settings_delta.new_fields_ids_map();
|
||||||
|
let old_fields_ids_map = context.index.fields_ids_map_with_metadata(&context.rtxn)?;
|
||||||
|
let old_searchable = settings_delta.old_searchable_attributes().as_ref();
|
||||||
|
let new_searchable = settings_delta.new_searchable_attributes().as_ref();
|
||||||
|
|
||||||
|
let current_document = document.current(
|
||||||
|
&context.rtxn,
|
||||||
|
context.index,
|
||||||
|
old_fields_ids_map.as_fields_ids_map(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
enum ActionToOperate {
|
||||||
|
ReindexAllFields,
|
||||||
|
// TODO improve by listing field prefixes
|
||||||
|
IndexAddedFields,
|
||||||
|
SkipDocument,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut action = ActionToOperate::SkipDocument;
|
||||||
|
// Here we do a preliminary check to determine the action to take.
|
||||||
|
// This check doesn't trigger the tokenizer as we never return
|
||||||
|
// PatternMatch::Match.
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
current_document,
|
||||||
|
&mut |field_name| {
|
||||||
|
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||||
|
|
||||||
|
// If the document must be reindexed, early return NoMatch to stop the scanning process.
|
||||||
|
if action == ActionToOperate::ReindexAllFields {
|
||||||
|
return Ok((fid, PatternMatch::NoMatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
|
||||||
|
action = match (old_field_metadata, new_field_metadata) {
|
||||||
|
// At least one field is added or removed from the exact fields => ReindexAllFields
|
||||||
|
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. })
|
||||||
|
if old_exact != new_exact =>
|
||||||
|
{
|
||||||
|
ActionToOperate::ReindexAllFields
|
||||||
|
}
|
||||||
|
// At least one field is removed from the searchable fields => ReindexAllFields
|
||||||
|
(Metadata { searchable: Some(_), .. }, Metadata { searchable: None, .. }) => {
|
||||||
|
ActionToOperate::ReindexAllFields
|
||||||
|
}
|
||||||
|
// At least one field is added in the searchable fields => IndexAddedFields
|
||||||
|
(Metadata { searchable: None, .. }, Metadata { searchable: Some(_), .. }) => {
|
||||||
|
// We can safely overwrite the action, because we early return when action is ReindexAllFields.
|
||||||
|
ActionToOperate::IndexAddedFields
|
||||||
|
}
|
||||||
|
_ => action,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((fid, PatternMatch::Parent))
|
||||||
|
},
|
||||||
|
&mut |_, _, _, _| Ok(()),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Early return when we don't need to index the document
|
||||||
|
if action == ActionToOperate::SkipDocument {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let field_id = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
|
||||||
|
let pattern_match = match action {
|
||||||
|
ActionToOperate::ReindexAllFields => {
|
||||||
|
if old_field_metadata.is_searchable() || new_field_metadata.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
// If any old or new field is searchable then we need to iterate over all fields
|
||||||
|
// else if any field matches we need to iterate over all fields
|
||||||
|
} else if has_searchable_children(
|
||||||
|
field_name,
|
||||||
|
old_searchable.zip(new_searchable).map(|(old, new)| old.iter().chain(new)),
|
||||||
|
) {
|
||||||
|
PatternMatch::Parent
|
||||||
|
} else {
|
||||||
|
PatternMatch::NoMatch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ActionToOperate::IndexAddedFields => {
|
||||||
|
// Was not searchable but now is
|
||||||
|
if !old_field_metadata.is_searchable() && new_field_metadata.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
// If the field is now a parent of a searchable field
|
||||||
|
} else if has_searchable_children(field_name, new_searchable) {
|
||||||
|
PatternMatch::Parent
|
||||||
|
} else {
|
||||||
|
PatternMatch::NoMatch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ActionToOperate::SkipDocument => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, pattern_match))
|
||||||
|
};
|
||||||
|
|
||||||
|
let old_disabled_typos_terms = settings_delta.old_disabled_typos_terms();
|
||||||
|
let new_disabled_typos_terms = settings_delta.new_disabled_typos_terms();
|
||||||
|
let mut token_fn = |_field_name: &str, field_id, pos, word: &str| {
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
|
||||||
|
|
||||||
|
match (old_field_metadata, new_field_metadata) {
|
||||||
|
(
|
||||||
|
Metadata { searchable: Some(_), exact: old_exact, .. },
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
) => cached_sorter.insert_del_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
old_exact || old_disabled_typos_terms.is_exact(word),
|
||||||
|
// We deleted the field globally
|
||||||
|
FieldDbExtraction::Skip,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
Metadata { searchable: Some(_), exact: new_exact, .. },
|
||||||
|
) => cached_sorter.insert_add_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
new_exact || new_disabled_typos_terms.is_exact(word),
|
||||||
|
FieldDbExtraction::Extract,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
),
|
||||||
|
(Metadata { searchable: None, .. }, Metadata { searchable: None, .. }) => {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. }) => {
|
||||||
|
cached_sorter.insert_del_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
old_exact || old_disabled_typos_terms.is_exact(word),
|
||||||
|
// The field has already been extracted
|
||||||
|
FieldDbExtraction::Skip,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
)?;
|
||||||
|
cached_sorter.insert_add_u32(
|
||||||
|
field_id,
|
||||||
|
pos,
|
||||||
|
word,
|
||||||
|
new_exact || new_disabled_typos_terms.is_exact(word),
|
||||||
|
// The field has already been extracted
|
||||||
|
FieldDbExtraction::Skip,
|
||||||
|
document.docid(),
|
||||||
|
doc_alloc,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO we must tokenize twice when we change global parameters like stop words,
|
||||||
|
// the language settings, dictionary, separators, non-separators...
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
current_document,
|
||||||
|
&mut should_tokenize,
|
||||||
|
&mut token_fn,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,17 +6,24 @@ use bumpalo::Bump;
|
|||||||
|
|
||||||
use super::match_searchable_field;
|
use super::match_searchable_field;
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
use crate::fields_ids_map::metadata::Metadata;
|
||||||
|
use crate::proximity::ProximityPrecision::*;
|
||||||
use crate::proximity::{index_proximity, MAX_DISTANCE};
|
use crate::proximity::{index_proximity, MAX_DISTANCE};
|
||||||
use crate::update::new::document::{Document, DocumentContext};
|
use crate::update::new::document::{Document, DocumentContext};
|
||||||
use crate::update::new::extract::cache::BalancedCaches;
|
use crate::update::new::extract::cache::BalancedCaches;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChanges, Extractor, IndexingContext,
|
extract, DocumentChanges, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
|
use crate::update::new::indexer::settings_change_extract;
|
||||||
|
use crate::update::new::indexer::settings_changes::{
|
||||||
|
DocumentsIndentifiers, SettingsChangeExtractor,
|
||||||
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::IndexingStep;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::{DocumentChange, DocumentIdentifiers};
|
||||||
use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
|
use crate::update::settings::SettingsDelta;
|
||||||
|
use crate::{FieldId, PatternMatch, Result, UserError, MAX_POSITION_PER_ATTRIBUTE};
|
||||||
|
|
||||||
pub struct WordPairProximityDocidsExtractorData<'a> {
|
pub struct WordPairProximityDocidsExtractorData<'a> {
|
||||||
tokenizer: DocumentTokenizer<'a>,
|
tokenizer: DocumentTokenizer<'a>,
|
||||||
@@ -116,7 +123,7 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
// and to store the docids of the documents that have a number of words in a given field
|
// and to store the docids of the documents that have a number of words in a given field
|
||||||
// equal to or under than MAX_COUNTED_WORDS.
|
// equal to or under than MAX_COUNTED_WORDS.
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
context: &DocumentContext<RefCell<BalancedCaches>>,
|
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
|
||||||
document_tokenizer: &DocumentTokenizer,
|
document_tokenizer: &DocumentTokenizer,
|
||||||
searchable_attributes: Option<&[&str]>,
|
searchable_attributes: Option<&[&str]>,
|
||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
@@ -147,8 +154,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
del_word_pair_proximity.push(((w1, w2), prox));
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -170,8 +181,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
del_word_pair_proximity.push(((w1, w2), prox));
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -180,8 +195,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
add_word_pair_proximity.push(((w1, w2), prox));
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -192,8 +211,12 @@ impl WordPairProximityDocidsExtractor {
|
|||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
new_fields_ids_map,
|
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
new_fields_ids_map
|
||||||
|
.id_with_metadata_or_insert(field_name)
|
||||||
|
.ok_or(UserError::AttributeLimitReached.into())
|
||||||
|
},
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
add_word_pair_proximity.push(((w1, w2), prox));
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
@@ -257,8 +280,8 @@ fn drain_word_positions(
|
|||||||
fn process_document_tokens<'doc>(
|
fn process_document_tokens<'doc>(
|
||||||
document: impl Document<'doc>,
|
document: impl Document<'doc>,
|
||||||
document_tokenizer: &DocumentTokenizer,
|
document_tokenizer: &DocumentTokenizer,
|
||||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
|
||||||
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
||||||
|
field_id_and_metadata: &mut impl FnMut(&str) -> Result<(FieldId, Metadata)>,
|
||||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut field_id = None;
|
let mut field_id = None;
|
||||||
@@ -279,8 +302,248 @@ fn process_document_tokens<'doc>(
|
|||||||
word_positions.push_back((Rc::from(word), pos));
|
word_positions.push_back((Rc::from(word), pos));
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let (field_id, meta) = field_id_and_metadata(field_name)?;
|
||||||
|
|
||||||
|
let pattern_match = if meta.is_searchable() {
|
||||||
|
PatternMatch::Match
|
||||||
|
} else {
|
||||||
|
// TODO: should be a match on the field_name using `match_field_legacy` function,
|
||||||
|
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||||
|
PatternMatch::Parent
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, pattern_match))
|
||||||
|
};
|
||||||
|
|
||||||
|
document_tokenizer.tokenize_document(document, &mut should_tokenize, &mut token_fn)?;
|
||||||
|
|
||||||
drain_word_positions(word_positions, word_pair_proximity);
|
drain_word_positions(word_positions, word_pair_proximity);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct WordPairProximityDocidsSettingsExtractorsData<'a, SD> {
|
||||||
|
tokenizer: DocumentTokenizer<'a>,
|
||||||
|
max_memory_by_thread: Option<usize>,
|
||||||
|
buckets: usize,
|
||||||
|
settings_delta: &'a SD,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||||
|
for WordPairProximityDocidsSettingsExtractorsData<'_, SD>
|
||||||
|
{
|
||||||
|
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||||
|
|
||||||
|
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||||
|
Ok(RefCell::new(BalancedCaches::new_in(
|
||||||
|
self.buckets,
|
||||||
|
self.max_memory_by_thread,
|
||||||
|
extractor_alloc,
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process<'doc>(
|
||||||
|
&'doc self,
|
||||||
|
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
|
||||||
|
context: &'doc DocumentContext<Self::Data>,
|
||||||
|
) -> crate::Result<()> {
|
||||||
|
for document in documents {
|
||||||
|
let document = document?;
|
||||||
|
SettingsChangeWordPairProximityDocidsExtractors::extract_document_from_settings_change(
|
||||||
|
document,
|
||||||
|
context,
|
||||||
|
&self.tokenizer,
|
||||||
|
self.settings_delta,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SettingsChangeWordPairProximityDocidsExtractors;
|
||||||
|
|
||||||
|
impl SettingsChangeWordPairProximityDocidsExtractors {
|
||||||
|
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
|
||||||
|
settings_delta: &SD,
|
||||||
|
documents: &'indexer DocumentsIndentifiers<'indexer>,
|
||||||
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
|
step: IndexingStep,
|
||||||
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
|
where
|
||||||
|
SD: SettingsDelta + Sync,
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
// Warning: this is duplicated code from extract_word_docids.rs
|
||||||
|
let rtxn = indexing_context.index.read_txn()?;
|
||||||
|
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||||
|
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||||
|
let allowed_separators: Option<Vec<_>> =
|
||||||
|
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
|
let mut builder = tokenizer_builder(
|
||||||
|
stop_words.as_ref(),
|
||||||
|
allowed_separators.as_deref(),
|
||||||
|
dictionary.as_deref(),
|
||||||
|
);
|
||||||
|
let tokenizer = builder.build();
|
||||||
|
let localized_attributes_rules =
|
||||||
|
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||||
|
let document_tokenizer = DocumentTokenizer {
|
||||||
|
tokenizer: &tokenizer,
|
||||||
|
localized_attributes_rules: &localized_attributes_rules,
|
||||||
|
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||||
|
};
|
||||||
|
let extractor_data = WordPairProximityDocidsSettingsExtractorsData {
|
||||||
|
tokenizer: document_tokenizer,
|
||||||
|
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||||
|
buckets: rayon::current_num_threads(),
|
||||||
|
settings_delta,
|
||||||
|
};
|
||||||
|
let datastore = ThreadLocal::new();
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids_extraction");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
settings_change_extract(
|
||||||
|
documents,
|
||||||
|
&extractor_data,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
&datastore,
|
||||||
|
step,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts document words from a settings change.
|
||||||
|
fn extract_document_from_settings_change<SD: SettingsDelta>(
|
||||||
|
document: DocumentIdentifiers<'_>,
|
||||||
|
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
|
||||||
|
document_tokenizer: &DocumentTokenizer,
|
||||||
|
settings_delta: &SD,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cached_sorter = context.data.borrow_mut_or_yield();
|
||||||
|
let doc_alloc = &context.doc_alloc;
|
||||||
|
|
||||||
|
let new_fields_ids_map = settings_delta.new_fields_ids_map();
|
||||||
|
let old_fields_ids_map = settings_delta.old_fields_ids_map();
|
||||||
|
let old_proximity_precision = *settings_delta.old_proximity_precision();
|
||||||
|
let new_proximity_precision = *settings_delta.new_proximity_precision();
|
||||||
|
|
||||||
|
let current_document = document.current(
|
||||||
|
&context.rtxn,
|
||||||
|
context.index,
|
||||||
|
old_fields_ids_map.as_fields_ids_map(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
enum ActionToOperate {
|
||||||
|
ReindexAllFields,
|
||||||
|
SkipDocument,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO prefix_fid delete_old_fid_based_databases
|
||||||
|
let mut action = match (old_proximity_precision, new_proximity_precision) {
|
||||||
|
(ByAttribute, ByWord) => ActionToOperate::ReindexAllFields,
|
||||||
|
(_, _) => ActionToOperate::SkipDocument,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Here we do a preliminary check to determine the action to take.
|
||||||
|
// This check doesn't trigger the tokenizer as we never return
|
||||||
|
// PatternMatch::Match.
|
||||||
|
if action != ActionToOperate::ReindexAllFields {
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
current_document,
|
||||||
|
&mut |field_name| {
|
||||||
|
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||||
|
|
||||||
|
// If the document must be reindexed, early return NoMatch to stop the scanning process.
|
||||||
|
if action == ActionToOperate::ReindexAllFields {
|
||||||
|
return Ok((fid, PatternMatch::NoMatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
|
||||||
|
|
||||||
|
action = match (old_field_metadata, new_field_metadata) {
|
||||||
|
// At least one field is removed or added from the searchable fields
|
||||||
|
(
|
||||||
|
Metadata { searchable: Some(_), .. },
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
)
|
||||||
|
| (
|
||||||
|
Metadata { searchable: None, .. },
|
||||||
|
Metadata { searchable: Some(_), .. },
|
||||||
|
) => ActionToOperate::ReindexAllFields,
|
||||||
|
_ => action,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((fid, PatternMatch::Parent))
|
||||||
|
},
|
||||||
|
&mut |_, _, _, _| Ok(()),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Early return when we don't need to index the document
|
||||||
|
if action == ActionToOperate::SkipDocument {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut del_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
|
let mut add_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
|
|
||||||
|
// is a vecdequeue, and will be smol, so can stay on the heap for now
|
||||||
|
let mut word_positions: VecDeque<(Rc<str>, u16)> =
|
||||||
|
VecDeque::with_capacity(MAX_DISTANCE as usize);
|
||||||
|
|
||||||
|
process_document_tokens(
|
||||||
|
current_document,
|
||||||
|
// TODO Tokenize must be based on old settings
|
||||||
|
document_tokenizer,
|
||||||
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
Ok(old_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
|
||||||
|
},
|
||||||
|
&mut |(w1, w2), prox| {
|
||||||
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
process_document_tokens(
|
||||||
|
current_document,
|
||||||
|
// TODO Tokenize must be based on new settings
|
||||||
|
document_tokenizer,
|
||||||
|
&mut word_positions,
|
||||||
|
&mut |field_name| {
|
||||||
|
Ok(new_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
|
||||||
|
},
|
||||||
|
&mut |(w1, w2), prox| {
|
||||||
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let mut key_buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
|
|
||||||
|
del_word_pair_proximity.sort_unstable();
|
||||||
|
del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||||
|
for ((w1, w2), prox) in del_word_pair_proximity.iter() {
|
||||||
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
|
cached_sorter.insert_del_u32(key, document.docid())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
add_word_pair_proximity.sort_unstable();
|
||||||
|
add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||||
|
for ((w1, w2), prox) in add_word_pair_proximity.iter() {
|
||||||
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
|
cached_sorter.insert_add_u32(key, document.docid())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,8 +2,12 @@ mod extract_word_docids;
|
|||||||
mod extract_word_pair_proximity_docids;
|
mod extract_word_pair_proximity_docids;
|
||||||
mod tokenize_document;
|
mod tokenize_document;
|
||||||
|
|
||||||
pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
|
pub use extract_word_docids::{
|
||||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
SettingsChangeWordDocidsExtractors, WordDocidsCaches, WordDocidsExtractors,
|
||||||
|
};
|
||||||
|
pub use extract_word_pair_proximity_docids::{
|
||||||
|
SettingsChangeWordPairProximityDocidsExtractors, WordPairProximityDocidsExtractor,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
||||||
|
|
||||||
@@ -27,3 +31,17 @@ pub fn match_searchable_field(
|
|||||||
|
|
||||||
selection
|
selection
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// return `true` if the provided `field_name` is a parent of at least one of the fields contained in `searchable`,
|
||||||
|
/// or if `searchable` is `None`.
|
||||||
|
fn has_searchable_children<I, A>(field_name: &str, searchable: Option<I>) -> bool
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = A>,
|
||||||
|
A: AsRef<str>,
|
||||||
|
{
|
||||||
|
searchable.is_none_or(|fields| {
|
||||||
|
fields
|
||||||
|
.into_iter()
|
||||||
|
.any(|attr| match_field_legacy(attr.as_ref(), field_name) == PatternMatch::Parent)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,10 +8,7 @@ use crate::update::new::document::Document;
|
|||||||
use crate::update::new::extract::perm_json_p::{
|
use crate::update::new::extract::perm_json_p::{
|
||||||
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
|
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{FieldId, InternalError, LocalizedAttributesRule, Result, MAX_WORD_LENGTH};
|
||||||
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
|
||||||
MAX_WORD_LENGTH,
|
|
||||||
};
|
|
||||||
|
|
||||||
// todo: should be crate::proximity::MAX_DISTANCE but it has been forgotten
|
// todo: should be crate::proximity::MAX_DISTANCE but it has been forgotten
|
||||||
const MAX_DISTANCE: u32 = 8;
|
const MAX_DISTANCE: u32 = 8;
|
||||||
@@ -26,26 +23,25 @@ impl DocumentTokenizer<'_> {
|
|||||||
pub fn tokenize_document<'doc>(
|
pub fn tokenize_document<'doc>(
|
||||||
&self,
|
&self,
|
||||||
document: impl Document<'doc>,
|
document: impl Document<'doc>,
|
||||||
field_id_map: &mut GlobalFieldsIdsMap,
|
should_tokenize: &mut impl FnMut(&str) -> Result<(FieldId, PatternMatch)>,
|
||||||
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
|
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut field_position = HashMap::new();
|
let mut field_position = HashMap::new();
|
||||||
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
|
||||||
let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
|
|
||||||
return Err(UserError::AttributeLimitReached.into());
|
|
||||||
};
|
|
||||||
|
|
||||||
if meta.is_searchable() {
|
|
||||||
self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo: should be a match on the field_name using `match_field_legacy` function,
|
|
||||||
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
|
||||||
Ok(PatternMatch::Match)
|
|
||||||
};
|
|
||||||
|
|
||||||
for entry in document.iter_top_level_fields() {
|
for entry in document.iter_top_level_fields() {
|
||||||
let (field_name, value) = entry?;
|
let (field_name, value) = entry?;
|
||||||
|
|
||||||
|
if let (_, PatternMatch::NoMatch) = should_tokenize(field_name)? {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
||||||
|
let (fid, pattern_match) = should_tokenize(field_name)?;
|
||||||
|
if pattern_match == PatternMatch::Match {
|
||||||
|
self.tokenize_field(fid, field_name, value, token_fn, &mut field_position)?;
|
||||||
|
}
|
||||||
|
Ok(pattern_match)
|
||||||
|
};
|
||||||
|
|
||||||
// parse json.
|
// parse json.
|
||||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||||
Value::Object(object) => seek_leaf_values_in_object(
|
Value::Object(object) => seek_leaf_values_in_object(
|
||||||
@@ -192,7 +188,7 @@ mod test {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::update::new::document::{DocumentFromVersions, Versions};
|
use crate::update::new::document::{DocumentFromVersions, Versions};
|
||||||
use crate::FieldsIdsMap;
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, UserError};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tokenize_document() {
|
fn test_tokenize_document() {
|
||||||
@@ -231,6 +227,7 @@ mod test {
|
|||||||
Default::default(),
|
Default::default(),
|
||||||
Default::default(),
|
Default::default(),
|
||||||
Default::default(),
|
Default::default(),
|
||||||
|
Default::default(),
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
Default::default(),
|
Default::default(),
|
||||||
@@ -251,15 +248,19 @@ mod test {
|
|||||||
let document = Versions::single(document);
|
let document = Versions::single(document);
|
||||||
let document = DocumentFromVersions::new(&document);
|
let document = DocumentFromVersions::new(&document);
|
||||||
|
|
||||||
|
let mut should_tokenize = |field_name: &str| {
|
||||||
|
let Some(field_id) = global_fields_ids_map.id_or_insert(field_name) else {
|
||||||
|
return Err(UserError::AttributeLimitReached.into());
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((field_id, PatternMatch::Match))
|
||||||
|
};
|
||||||
|
|
||||||
document_tokenizer
|
document_tokenizer
|
||||||
.tokenize_document(
|
.tokenize_document(document, &mut should_tokenize, &mut |_fname, fid, pos, word| {
|
||||||
document,
|
words.insert([fid, pos], word.to_string());
|
||||||
&mut global_fields_ids_map,
|
Ok(())
|
||||||
&mut |_fname, fid, pos, word| {
|
})
|
||||||
words.insert([fid, pos], word.to_string());
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
snapshot!(format!("{:#?}", words), @r###"
|
snapshot!(format!("{:#?}", words), @r###"
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
use bumpalo::collections::Vec as BVec;
|
use bumpalo::collections::Vec as BVec;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@@ -27,7 +28,10 @@ use crate::vector::extractor::{
|
|||||||
use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed};
|
use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed};
|
||||||
use crate::vector::settings::ReindexAction;
|
use crate::vector::settings::ReindexAction;
|
||||||
use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
|
use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
|
||||||
use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
|
use crate::{
|
||||||
|
DocumentId, FieldDistribution, GlobalFieldsIdsMap, InternalError, Result, ThreadPoolNoAbort,
|
||||||
|
UserError,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct EmbeddingExtractor<'a, 'b> {
|
pub struct EmbeddingExtractor<'a, 'b> {
|
||||||
embedders: &'a RuntimeEmbedders,
|
embedders: &'a RuntimeEmbedders,
|
||||||
@@ -321,6 +325,15 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
let old_embedders = self.settings_delta.old_embedders();
|
let old_embedders = self.settings_delta.old_embedders();
|
||||||
let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc);
|
let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc);
|
||||||
|
|
||||||
|
// We get a reference to the new and old fields ids maps but
|
||||||
|
// note that those are local versions where updates to them
|
||||||
|
// will not be reflected in the database. It's not an issue
|
||||||
|
// because new settings do not generate new fields.
|
||||||
|
let new_fields_ids_map = RwLock::new(self.settings_delta.new_fields_ids_map().clone());
|
||||||
|
let new_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&new_fields_ids_map));
|
||||||
|
let old_fields_ids_map = RwLock::new(self.settings_delta.old_fields_ids_map().clone());
|
||||||
|
let old_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&old_fields_ids_map));
|
||||||
|
|
||||||
let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc);
|
let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc);
|
||||||
let embedder_configs = context.index.embedding_configs();
|
let embedder_configs = context.index.embedding_configs();
|
||||||
for (embedder_name, action) in self.settings_delta.embedder_actions().iter() {
|
for (embedder_name, action) in self.settings_delta.embedder_actions().iter() {
|
||||||
@@ -396,6 +409,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
if !must_regenerate {
|
if !must_regenerate {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we need to regenerate the prompts for the document
|
// we need to regenerate the prompts for the document
|
||||||
chunks.settings_change_autogenerated(
|
chunks.settings_change_autogenerated(
|
||||||
document.docid(),
|
document.docid(),
|
||||||
@@ -406,7 +420,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
)?,
|
)?,
|
||||||
self.settings_delta,
|
self.settings_delta,
|
||||||
context.new_fields_ids_map,
|
&old_fields_ids_map,
|
||||||
|
&new_fields_ids_map,
|
||||||
&unused_vectors_distribution,
|
&unused_vectors_distribution,
|
||||||
old_is_user_provided,
|
old_is_user_provided,
|
||||||
fragments_changed,
|
fragments_changed,
|
||||||
@@ -442,7 +457,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
)?,
|
)?,
|
||||||
self.settings_delta,
|
self.settings_delta,
|
||||||
context.new_fields_ids_map,
|
&old_fields_ids_map,
|
||||||
|
&new_fields_ids_map,
|
||||||
&unused_vectors_distribution,
|
&unused_vectors_distribution,
|
||||||
old_is_user_provided,
|
old_is_user_provided,
|
||||||
true,
|
true,
|
||||||
@@ -638,7 +654,8 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
external_docid: &'a str,
|
external_docid: &'a str,
|
||||||
document: D,
|
document: D,
|
||||||
settings_delta: &SD,
|
settings_delta: &SD,
|
||||||
fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
|
old_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
|
||||||
|
new_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||||
old_is_user_provided: bool,
|
old_is_user_provided: bool,
|
||||||
full_reindex: bool,
|
full_reindex: bool,
|
||||||
@@ -733,10 +750,17 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
||||||
};
|
};
|
||||||
|
|
||||||
let extractor =
|
let extractor = DocumentTemplateExtractor::new(
|
||||||
DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
|
document_template,
|
||||||
|
doc_alloc,
|
||||||
|
new_fields_ids_map,
|
||||||
|
);
|
||||||
let old_extractor = old_document_template.map(|old_document_template| {
|
let old_extractor = old_document_template.map(|old_document_template| {
|
||||||
DocumentTemplateExtractor::new(old_document_template, doc_alloc, fields_ids_map)
|
DocumentTemplateExtractor::new(
|
||||||
|
old_document_template,
|
||||||
|
doc_alloc,
|
||||||
|
old_fields_ids_map,
|
||||||
|
)
|
||||||
});
|
});
|
||||||
let metadata =
|
let metadata =
|
||||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||||
|
|||||||
@@ -372,11 +372,10 @@ where
|
|||||||
SD: SettingsDelta + Sync,
|
SD: SettingsDelta + Sync,
|
||||||
{
|
{
|
||||||
// Create the list of document ids to extract
|
// Create the list of document ids to extract
|
||||||
let rtxn = indexing_context.index.read_txn()?;
|
let index = indexing_context.index;
|
||||||
let all_document_ids =
|
let rtxn = index.read_txn()?;
|
||||||
indexing_context.index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
|
let all_document_ids = index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
|
||||||
let primary_key =
|
let primary_key = primary_key_from_db(index, &rtxn, &indexing_context.db_fields_ids_map)?;
|
||||||
primary_key_from_db(indexing_context.index, &rtxn, &indexing_context.db_fields_ids_map)?;
|
|
||||||
let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key);
|
let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key);
|
||||||
|
|
||||||
let span =
|
let span =
|
||||||
@@ -391,6 +390,133 @@ where
|
|||||||
extractor_allocs,
|
extractor_allocs,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
{
|
||||||
|
let WordDocidsCaches {
|
||||||
|
word_docids,
|
||||||
|
word_fid_docids,
|
||||||
|
exact_word_docids,
|
||||||
|
word_position_docids,
|
||||||
|
fid_word_count_docids,
|
||||||
|
} = {
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
SettingsChangeWordDocidsExtractors::run_extraction(
|
||||||
|
settings_delta,
|
||||||
|
&documents,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
IndexingStep::ExtractingWords,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::WordDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
word_docids,
|
||||||
|
index.word_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
word_fid_docids,
|
||||||
|
index.word_fid_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordFidDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
exact_word_docids,
|
||||||
|
index.exact_word_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<ExactWordDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
word_position_docids,
|
||||||
|
index.word_position_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordPositionDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
fid_word_count_docids,
|
||||||
|
index.field_id_word_count_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<FidWordCountDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the proximity extraction only if the precision is ByWord.
|
||||||
|
let new_proximity_precision = settings_delta.new_proximity_precision();
|
||||||
|
if *new_proximity_precision == ProximityPrecision::ByWord {
|
||||||
|
let caches = {
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
SettingsChangeWordPairProximityDocidsExtractors::run_extraction(
|
||||||
|
settings_delta,
|
||||||
|
&documents,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
IndexingStep::ExtractingWordProximity,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);
|
||||||
|
|
||||||
|
merge_and_send_docids(
|
||||||
|
caches,
|
||||||
|
index.word_pair_proximity_docids.remap_types(),
|
||||||
|
index,
|
||||||
|
extractor_sender.docids::<WordPairProximityDocids>(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
'vectors: {
|
'vectors: {
|
||||||
if settings_delta.embedder_actions().is_empty() {
|
if settings_delta.embedder_actions().is_empty() {
|
||||||
break 'vectors;
|
break 'vectors;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::{Arc, Once, RwLock};
|
use std::sync::{Arc, Once, RwLock};
|
||||||
use std::thread::{self, Builder};
|
use std::thread::{self, Builder};
|
||||||
@@ -8,9 +8,11 @@ use document_changes::{DocumentChanges, IndexingContext};
|
|||||||
pub use document_deletion::DocumentDeletion;
|
pub use document_deletion::DocumentDeletion;
|
||||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use heed::{RoTxn, RwTxn};
|
use heed::types::DecodeIgnore;
|
||||||
|
use heed::{BytesDecode, Database, RoTxn, RwTxn};
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
||||||
|
pub use settings_changes::settings_change_extract;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
pub use write::ChannelCongestion;
|
pub use write::ChannelCongestion;
|
||||||
use write::{build_vectors, update_index, write_to_db};
|
use write::{build_vectors, update_index, write_to_db};
|
||||||
@@ -20,12 +22,18 @@ use super::steps::IndexingStep;
|
|||||||
use super::thread_local::ThreadLocal;
|
use super::thread_local::ThreadLocal;
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
|
use crate::heed_codec::StrBEU16Codec;
|
||||||
use crate::progress::{EmbedderStats, Progress};
|
use crate::progress::{EmbedderStats, Progress};
|
||||||
|
use crate::proximity::ProximityPrecision;
|
||||||
|
use crate::update::new::steps::SettingsIndexerStep;
|
||||||
|
use crate::update::new::FacetFieldIdsDelta;
|
||||||
use crate::update::settings::SettingsDelta;
|
use crate::update::settings::SettingsDelta;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
|
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
|
||||||
use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
|
use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
|
||||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
|
use crate::{
|
||||||
|
Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
|
||||||
|
};
|
||||||
|
|
||||||
#[cfg(not(feature = "enterprise"))]
|
#[cfg(not(feature = "enterprise"))]
|
||||||
pub mod community_edition;
|
pub mod community_edition;
|
||||||
@@ -242,6 +250,20 @@ where
|
|||||||
SD: SettingsDelta + Sync,
|
SD: SettingsDelta + Sync,
|
||||||
{
|
{
|
||||||
delete_old_embedders_and_fragments(wtxn, index, settings_delta)?;
|
delete_old_embedders_and_fragments(wtxn, index, settings_delta)?;
|
||||||
|
delete_old_fid_based_databases(wtxn, index, settings_delta, must_stop_processing, progress)?;
|
||||||
|
|
||||||
|
// Clear word_pair_proximity if byWord to byAttribute
|
||||||
|
let old_proximity_precision = settings_delta.old_proximity_precision();
|
||||||
|
let new_proximity_precision = settings_delta.new_proximity_precision();
|
||||||
|
if *old_proximity_precision == ProximityPrecision::ByWord
|
||||||
|
&& *new_proximity_precision == ProximityPrecision::ByAttribute
|
||||||
|
{
|
||||||
|
index.word_pair_proximity_docids.clear(wtxn)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO delete useless searchable databases
|
||||||
|
// - Clear fid_prefix_* in the post processing
|
||||||
|
// - clear the prefix + fid_prefix if setting `PrefixSearch` is enabled
|
||||||
|
|
||||||
let mut bbbuffers = Vec::new();
|
let mut bbbuffers = Vec::new();
|
||||||
let finished_extraction = AtomicBool::new(false);
|
let finished_extraction = AtomicBool::new(false);
|
||||||
@@ -300,6 +322,8 @@ where
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||||
|
|
||||||
let new_embedders = settings_delta.new_embedders();
|
let new_embedders = settings_delta.new_embedders();
|
||||||
let embedder_actions = settings_delta.embedder_actions();
|
let embedder_actions = settings_delta.embedder_actions();
|
||||||
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
||||||
@@ -334,6 +358,18 @@ where
|
|||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
|
|
||||||
|
pool.install(|| {
|
||||||
|
// WARN When implementing the facets don't forget this
|
||||||
|
let facet_field_ids_delta = FacetFieldIdsDelta::new(0, 0);
|
||||||
|
post_processing::post_process(
|
||||||
|
indexing_context,
|
||||||
|
wtxn,
|
||||||
|
global_fields_ids_map,
|
||||||
|
facet_field_ids_delta,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap()?;
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
||||||
index.cellulite.build(
|
index.cellulite.build(
|
||||||
wtxn,
|
wtxn,
|
||||||
@@ -463,6 +499,106 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Deletes entries refering the provided
|
||||||
|
/// fids from the fid-based databases.
|
||||||
|
fn delete_old_fid_based_databases<SD, MSP>(
|
||||||
|
wtxn: &mut RwTxn<'_>,
|
||||||
|
index: &Index,
|
||||||
|
settings_delta: &SD,
|
||||||
|
must_stop_processing: &MSP,
|
||||||
|
progress: &Progress,
|
||||||
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
SD: SettingsDelta + Sync,
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
let fids_to_delete: Option<BTreeSet<_>> = {
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
|
let old_searchable_attributes = settings_delta.old_searchable_attributes().as_ref();
|
||||||
|
let new_searchable_attributes = settings_delta.new_searchable_attributes().as_ref();
|
||||||
|
old_searchable_attributes.zip(new_searchable_attributes).map(|(old, new)| {
|
||||||
|
old.iter()
|
||||||
|
// Ignore the field if it is not searchable anymore
|
||||||
|
// or if it was never referenced in any document
|
||||||
|
.filter_map(|name| if new.contains(name) { None } else { fields_ids_map.id(name) })
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(fids_to_delete) = fids_to_delete else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
progress.update_progress(SettingsIndexerStep::DeletingOldWordFidDocids);
|
||||||
|
delete_old_word_fid_docids(wtxn, index.word_fid_docids, must_stop_processing, &fids_to_delete)?;
|
||||||
|
|
||||||
|
progress.update_progress(SettingsIndexerStep::DeletingOldFidWordCountDocids);
|
||||||
|
delete_old_fid_word_count_docids(wtxn, index, must_stop_processing, &fids_to_delete)?;
|
||||||
|
|
||||||
|
progress.update_progress(SettingsIndexerStep::DeletingOldWordPrefixFidDocids);
|
||||||
|
delete_old_word_fid_docids(
|
||||||
|
wtxn,
|
||||||
|
index.word_prefix_fid_docids,
|
||||||
|
must_stop_processing,
|
||||||
|
&fids_to_delete,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_old_word_fid_docids<'txn, MSP, DC>(
|
||||||
|
wtxn: &mut RwTxn<'txn>,
|
||||||
|
database: Database<StrBEU16Codec, DC>,
|
||||||
|
must_stop_processing: &MSP,
|
||||||
|
fids_to_delete: &BTreeSet<u16>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
DC: BytesDecode<'txn>,
|
||||||
|
{
|
||||||
|
let mut iter = database.iter_mut(wtxn)?.remap_data_type::<DecodeIgnore>();
|
||||||
|
while let Some(((_word, fid), ())) = iter.next().transpose()? {
|
||||||
|
// TODO should I call it that often?
|
||||||
|
if must_stop_processing() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
|
if fids_to_delete.contains(&fid) {
|
||||||
|
// safety: We don't keep any references to the data.
|
||||||
|
unsafe { iter.del_current()? };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_old_fid_word_count_docids<MSP>(
|
||||||
|
wtxn: &mut RwTxn<'_>,
|
||||||
|
index: &Index,
|
||||||
|
must_stop_processing: &MSP,
|
||||||
|
fids_to_delete: &BTreeSet<u16>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
let db = index.field_id_word_count_docids.remap_data_type::<DecodeIgnore>();
|
||||||
|
for &fid_to_delete in fids_to_delete {
|
||||||
|
if must_stop_processing() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut iter = db.prefix_iter_mut(wtxn, &(fid_to_delete, 0))?;
|
||||||
|
while let Some(((fid, _word_count), ())) = iter.next().transpose()? {
|
||||||
|
debug_assert_eq!(fid, fid_to_delete);
|
||||||
|
// safety: We don't keep any references to the data.
|
||||||
|
unsafe { iter.del_current()? };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn indexer_memory_settings(
|
fn indexer_memory_settings(
|
||||||
current_num_threads: usize,
|
current_num_threads: usize,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
|
|||||||
@@ -28,6 +28,9 @@ make_enum_progress! {
|
|||||||
ChangingVectorStore,
|
ChangingVectorStore,
|
||||||
UsingStableIndexer,
|
UsingStableIndexer,
|
||||||
UsingExperimentalIndexer,
|
UsingExperimentalIndexer,
|
||||||
|
DeletingOldWordFidDocids,
|
||||||
|
DeletingOldFidWordCountDocids,
|
||||||
|
DeletingOldWordPrefixFidDocids,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -48,10 +48,11 @@ use crate::{
|
|||||||
ChannelCongestion, FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result,
|
ChannelCongestion, FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
|
||||||
pub enum Setting<T> {
|
pub enum Setting<T> {
|
||||||
Set(T),
|
Set(T),
|
||||||
Reset,
|
Reset,
|
||||||
|
#[default]
|
||||||
NotSet,
|
NotSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -71,12 +72,6 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Default for Setting<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::NotSet
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Setting<T> {
|
impl<T> Setting<T> {
|
||||||
pub fn set(self) -> Option<T> {
|
pub fn set(self) -> Option<T> {
|
||||||
match self {
|
match self {
|
||||||
@@ -1589,33 +1584,33 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
|
|
||||||
// only use the new indexer when only the embedder possibly changed
|
// only use the new indexer when only the embedder possibly changed
|
||||||
if let Self {
|
if let Self {
|
||||||
searchable_fields: Setting::NotSet,
|
searchable_fields: _,
|
||||||
displayed_fields: Setting::NotSet,
|
displayed_fields: Setting::NotSet,
|
||||||
filterable_fields: Setting::NotSet,
|
filterable_fields: Setting::NotSet,
|
||||||
sortable_fields: Setting::NotSet,
|
sortable_fields: Setting::NotSet,
|
||||||
criteria: Setting::NotSet,
|
criteria: Setting::NotSet,
|
||||||
stop_words: Setting::NotSet,
|
stop_words: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
non_separator_tokens: Setting::NotSet,
|
non_separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
separator_tokens: Setting::NotSet,
|
separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
dictionary: Setting::NotSet,
|
dictionary: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
distinct_field: Setting::NotSet,
|
distinct_field: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
primary_key: Setting::NotSet,
|
primary_key: Setting::NotSet,
|
||||||
authorize_typos: Setting::NotSet,
|
authorize_typos: Setting::NotSet,
|
||||||
min_word_len_two_typos: Setting::NotSet,
|
min_word_len_two_typos: Setting::NotSet,
|
||||||
min_word_len_one_typo: Setting::NotSet,
|
min_word_len_one_typo: Setting::NotSet,
|
||||||
exact_words: Setting::NotSet,
|
exact_words: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
exact_attributes: Setting::NotSet,
|
exact_attributes: _,
|
||||||
max_values_per_facet: Setting::NotSet,
|
max_values_per_facet: Setting::NotSet,
|
||||||
sort_facet_values_by: Setting::NotSet,
|
sort_facet_values_by: Setting::NotSet,
|
||||||
pagination_max_total_hits: Setting::NotSet,
|
pagination_max_total_hits: Setting::NotSet,
|
||||||
proximity_precision: Setting::NotSet,
|
proximity_precision: _,
|
||||||
embedder_settings: _,
|
embedder_settings: _,
|
||||||
search_cutoff: Setting::NotSet,
|
search_cutoff: Setting::NotSet,
|
||||||
localized_attributes_rules: Setting::NotSet,
|
localized_attributes_rules: Setting::NotSet, // TODO to start with
|
||||||
prefix_search: Setting::NotSet,
|
prefix_search: Setting::NotSet, // TODO continue with this
|
||||||
facet_search: Setting::NotSet,
|
facet_search: Setting::NotSet,
|
||||||
disable_on_numbers: Setting::NotSet,
|
disable_on_numbers: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||||
chat: Setting::NotSet,
|
chat: Setting::NotSet,
|
||||||
vector_store: Setting::NotSet,
|
vector_store: Setting::NotSet,
|
||||||
wtxn: _,
|
wtxn: _,
|
||||||
@@ -1632,10 +1627,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
// Update index settings
|
// Update index settings
|
||||||
let embedding_config_updates = self.update_embedding_configs()?;
|
let embedding_config_updates = self.update_embedding_configs()?;
|
||||||
self.update_user_defined_searchable_attributes()?;
|
self.update_user_defined_searchable_attributes()?;
|
||||||
|
self.update_exact_attributes()?;
|
||||||
|
self.update_proximity_precision()?;
|
||||||
|
|
||||||
let mut new_inner_settings =
|
// Note that we don't need to update the searchables here,
|
||||||
InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
// as it will be done after the settings update.
|
||||||
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
|
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||||
|
|
||||||
let primary_key_id = self
|
let primary_key_id = self
|
||||||
.index
|
.index
|
||||||
@@ -2062,9 +2059,12 @@ impl InnerIndexSettings {
|
|||||||
let sortable_fields = index.sortable_fields(rtxn)?;
|
let sortable_fields = index.sortable_fields(rtxn)?;
|
||||||
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
|
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
|
||||||
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
|
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
|
||||||
let user_defined_searchable_attributes = index
|
let user_defined_searchable_attributes = match index.user_defined_searchable_fields(rtxn)? {
|
||||||
.user_defined_searchable_fields(rtxn)?
|
Some(fields) if fields.contains(&"*") => None,
|
||||||
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
|
Some(fields) => Some(fields.into_iter().map(|f| f.to_string()).collect()),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
let builder = MetadataBuilder::from_index(index, rtxn)?;
|
let builder = MetadataBuilder::from_index(index, rtxn)?;
|
||||||
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
||||||
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
|
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
|
||||||
@@ -2578,8 +2578,20 @@ fn deserialize_sub_embedder(
|
|||||||
/// Implement this trait for the settings delta type.
|
/// Implement this trait for the settings delta type.
|
||||||
/// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`.
|
/// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`.
|
||||||
pub trait SettingsDelta {
|
pub trait SettingsDelta {
|
||||||
fn new_embedders(&self) -> &RuntimeEmbedders;
|
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
||||||
|
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
||||||
|
|
||||||
|
fn old_searchable_attributes(&self) -> &Option<Vec<String>>;
|
||||||
|
fn new_searchable_attributes(&self) -> &Option<Vec<String>>;
|
||||||
|
|
||||||
|
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms;
|
||||||
|
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms;
|
||||||
|
|
||||||
|
fn old_proximity_precision(&self) -> &ProximityPrecision;
|
||||||
|
fn new_proximity_precision(&self) -> &ProximityPrecision;
|
||||||
|
|
||||||
fn old_embedders(&self) -> &RuntimeEmbedders;
|
fn old_embedders(&self) -> &RuntimeEmbedders;
|
||||||
|
fn new_embedders(&self) -> &RuntimeEmbedders;
|
||||||
fn new_embedder_category_id(&self) -> &HashMap<String, u8>;
|
fn new_embedder_category_id(&self) -> &HashMap<String, u8>;
|
||||||
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction>;
|
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction>;
|
||||||
fn try_for_each_fragment_diff<F, E>(
|
fn try_for_each_fragment_diff<F, E>(
|
||||||
@@ -2589,7 +2601,6 @@ pub trait SettingsDelta {
|
|||||||
) -> std::result::Result<(), E>
|
) -> std::result::Result<(), E>
|
||||||
where
|
where
|
||||||
F: FnMut(FragmentDiff) -> std::result::Result<(), E>;
|
F: FnMut(FragmentDiff) -> std::result::Result<(), E>;
|
||||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FragmentDiff<'a> {
|
pub struct FragmentDiff<'a> {
|
||||||
@@ -2598,26 +2609,47 @@ pub struct FragmentDiff<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl SettingsDelta for InnerIndexSettingsDiff {
|
impl SettingsDelta for InnerIndexSettingsDiff {
|
||||||
fn new_embedders(&self) -> &RuntimeEmbedders {
|
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
||||||
&self.new.runtime_embedders
|
&self.old.fields_ids_map
|
||||||
|
}
|
||||||
|
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
||||||
|
&self.new.fields_ids_map
|
||||||
|
}
|
||||||
|
|
||||||
|
fn old_searchable_attributes(&self) -> &Option<Vec<String>> {
|
||||||
|
&self.old.user_defined_searchable_attributes
|
||||||
|
}
|
||||||
|
fn new_searchable_attributes(&self) -> &Option<Vec<String>> {
|
||||||
|
&self.new.user_defined_searchable_attributes
|
||||||
|
}
|
||||||
|
|
||||||
|
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms {
|
||||||
|
&self.old.disabled_typos_terms
|
||||||
|
}
|
||||||
|
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms {
|
||||||
|
&self.new.disabled_typos_terms
|
||||||
|
}
|
||||||
|
|
||||||
|
fn old_proximity_precision(&self) -> &ProximityPrecision {
|
||||||
|
&self.old.proximity_precision
|
||||||
|
}
|
||||||
|
fn new_proximity_precision(&self) -> &ProximityPrecision {
|
||||||
|
&self.new.proximity_precision
|
||||||
}
|
}
|
||||||
|
|
||||||
fn old_embedders(&self) -> &RuntimeEmbedders {
|
fn old_embedders(&self) -> &RuntimeEmbedders {
|
||||||
&self.old.runtime_embedders
|
&self.old.runtime_embedders
|
||||||
}
|
}
|
||||||
|
fn new_embedders(&self) -> &RuntimeEmbedders {
|
||||||
|
&self.new.runtime_embedders
|
||||||
|
}
|
||||||
|
|
||||||
fn new_embedder_category_id(&self) -> &HashMap<String, u8> {
|
fn new_embedder_category_id(&self) -> &HashMap<String, u8> {
|
||||||
&self.new.embedder_category_id
|
&self.new.embedder_category_id
|
||||||
}
|
}
|
||||||
|
|
||||||
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction> {
|
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction> {
|
||||||
&self.embedding_config_updates
|
&self.embedding_config_updates
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
|
||||||
&self.new.fields_ids_map
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_for_each_fragment_diff<F, E>(
|
fn try_for_each_fragment_diff<F, E>(
|
||||||
&self,
|
&self,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
|
|||||||
@@ -14,28 +14,21 @@ fn set_and_reset_searchable_fields() {
|
|||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|
||||||
// First we send 3 documents with ids from 1 to 3.
|
// First we send 3 documents with ids from 1 to 3.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
|
||||||
|
|
||||||
index
|
index
|
||||||
.add_documents_using_wtxn(
|
.add_documents(documents!([
|
||||||
&mut wtxn,
|
{ "id": 1, "name": "kevin", "age": 23 },
|
||||||
documents!([
|
{ "id": 2, "name": "kevina", "age": 21},
|
||||||
{ "id": 1, "name": "kevin", "age": 23 },
|
{ "id": 3, "name": "benoit", "age": 34 }
|
||||||
{ "id": 2, "name": "kevina", "age": 21},
|
]))
|
||||||
{ "id": 3, "name": "benoit", "age": 34 }
|
|
||||||
]),
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
index
|
index
|
||||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
.update_settings(|settings| {
|
||||||
settings.set_searchable_fields(vec!["name".into()]);
|
settings.set_searchable_fields(vec!["name".into()]);
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
|
||||||
|
|
||||||
db_snap!(index, fields_ids_map, @r###"
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
0 id |
|
0 id |
|
||||||
1 name |
|
1 name |
|
||||||
|
|||||||
@@ -5,103 +5,36 @@ mod v1_15;
|
|||||||
mod v1_16;
|
mod v1_16;
|
||||||
|
|
||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
|
use v1_12::{FixFieldDistribution, RecomputeStats};
|
||||||
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
|
use v1_13::AddNewStats;
|
||||||
use v1_14::Latest_V1_13_To_Latest_V1_14;
|
use v1_14::UpgradeArroyVersion;
|
||||||
use v1_15::Latest_V1_14_To_Latest_V1_15;
|
use v1_15::RecomputeWordFst;
|
||||||
use v1_16::Latest_V1_15_To_V1_16_0;
|
use v1_16::SwitchToMultimodal;
|
||||||
|
|
||||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||||
use crate::progress::{Progress, VariableNameStep};
|
use crate::progress::{Progress, VariableNameStep};
|
||||||
use crate::{Index, InternalError, Result};
|
use crate::{Index, InternalError, Result};
|
||||||
|
|
||||||
trait UpgradeIndex {
|
trait UpgradeIndex {
|
||||||
|
/// Returns `true` if `upgrade` should be called when the index started with version `initial_version`.
|
||||||
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
|
||||||
|
|
||||||
/// Returns `true` if the index scheduler must regenerate its cached stats.
|
/// Returns `true` if the index scheduler must regenerate its cached stats.
|
||||||
fn upgrade(
|
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool>;
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
/// Description of the upgrade for progress display purposes.
|
||||||
index: &Index,
|
fn description(&self) -> &'static str;
|
||||||
original: (u32, u32, u32),
|
|
||||||
progress: Progress,
|
|
||||||
) -> Result<bool>;
|
|
||||||
fn target_version(&self) -> (u32, u32, u32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
||||||
&V1_12_To_V1_12_3 {},
|
&FixFieldDistribution {},
|
||||||
&V1_12_3_To_V1_13_0 {},
|
&RecomputeStats {},
|
||||||
&V1_13_0_To_V1_13_1 {},
|
&AddNewStats {},
|
||||||
&V1_13_1_To_Latest_V1_13 {},
|
&UpgradeArroyVersion {},
|
||||||
&Latest_V1_13_To_Latest_V1_14 {},
|
&RecomputeWordFst {},
|
||||||
&Latest_V1_14_To_Latest_V1_15 {},
|
&SwitchToMultimodal {},
|
||||||
&Latest_V1_15_To_V1_16_0 {},
|
|
||||||
&ToTargetNoOp { target: (1, 18, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 19, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 20, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 21, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 22, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 23, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 24, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 25, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 26, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 27, 0) },
|
|
||||||
&ToTargetNoOp { target: (1, 28, 0) },
|
|
||||||
// This is the last upgrade function, it will be called when the index is up to date.
|
|
||||||
// any other upgrade function should be added before this one.
|
|
||||||
&ToCurrentNoOp {},
|
|
||||||
];
|
];
|
||||||
|
|
||||||
/// Causes a compile-time error if the argument is not in range of `0..UPGRADE_FUNCTIONS.len()`
|
|
||||||
macro_rules! function_index {
|
|
||||||
($start:expr) => {{
|
|
||||||
const _CHECK_INDEX: () = {
|
|
||||||
if $start >= $crate::update::upgrade::UPGRADE_FUNCTIONS.len() {
|
|
||||||
panic!("upgrade functions out of range")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
$start
|
|
||||||
}};
|
|
||||||
}
|
|
||||||
|
|
||||||
const fn start(from: (u32, u32, u32)) -> Option<usize> {
|
|
||||||
let start = match from {
|
|
||||||
(1, 12, 0..=2) => function_index!(0),
|
|
||||||
(1, 12, 3..) => function_index!(1),
|
|
||||||
(1, 13, 0) => function_index!(2),
|
|
||||||
(1, 13, _) => function_index!(4),
|
|
||||||
(1, 14, _) => function_index!(5),
|
|
||||||
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
|
||||||
(1, 15, _) => function_index!(6),
|
|
||||||
(1, 16, _) | (1, 17, _) => function_index!(7),
|
|
||||||
(1, 18, _) => function_index!(8),
|
|
||||||
(1, 19, _) => function_index!(9),
|
|
||||||
(1, 20, _) => function_index!(10),
|
|
||||||
(1, 21, _) => function_index!(11),
|
|
||||||
(1, 22, _) => function_index!(12),
|
|
||||||
(1, 23, _) => function_index!(13),
|
|
||||||
(1, 24, _) => function_index!(14),
|
|
||||||
(1, 25, _) => function_index!(15),
|
|
||||||
(1, 26, _) => function_index!(16),
|
|
||||||
(1, 27, _) => function_index!(17),
|
|
||||||
(1, 28, _) => function_index!(18),
|
|
||||||
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
|
|
||||||
// considering dumpless upgrade.
|
|
||||||
(_major, _minor, _patch) => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
Some(start)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Causes a compile-time error if the latest package cannot be upgraded.
|
|
||||||
///
|
|
||||||
/// This serves as a reminder to consider the proper dumpless upgrade implementation when changing the package version.
|
|
||||||
const _CHECK_PACKAGE_CAN_UPGRADE: () = {
|
|
||||||
if start((VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)).is_none() {
|
|
||||||
panic!("cannot upgrade from latest package version")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Return true if the cached stats of the index must be regenerated
|
/// Return true if the cached stats of the index must be regenerated
|
||||||
pub fn upgrade<MSP>(
|
pub fn upgrade<MSP>(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
@@ -113,79 +46,34 @@ pub fn upgrade<MSP>(
|
|||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
let from = index.get_version(wtxn)?.unwrap_or(db_version);
|
let upgrade_functions = UPGRADE_FUNCTIONS;
|
||||||
|
|
||||||
let start =
|
let initial_version = index.get_version(wtxn)?.unwrap_or(db_version);
|
||||||
start(from).ok_or_else(|| InternalError::CannotUpgradeToVersion(from.0, from.1, from.2))?;
|
|
||||||
|
|
||||||
enum UpgradeVersion {}
|
enum UpgradeVersion {}
|
||||||
let upgrade_path = &UPGRADE_FUNCTIONS[start..];
|
|
||||||
|
|
||||||
let mut current_version = from;
|
|
||||||
let mut regenerate_stats = false;
|
let mut regenerate_stats = false;
|
||||||
for (i, upgrade) in upgrade_path.iter().enumerate() {
|
for (i, upgrade) in upgrade_functions.iter().enumerate() {
|
||||||
if (must_stop_processing)() {
|
if (must_stop_processing)() {
|
||||||
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
|
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
|
||||||
}
|
}
|
||||||
let target = upgrade.target_version();
|
if upgrade.must_upgrade(initial_version) {
|
||||||
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
|
||||||
format!(
|
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||||
"Upgrading from v{}.{}.{} to v{}.{}.{}",
|
upgrade.description(),
|
||||||
current_version.0,
|
i as u32,
|
||||||
current_version.1,
|
upgrade_functions.len() as u32,
|
||||||
current_version.2,
|
));
|
||||||
target.0,
|
} else {
|
||||||
target.1,
|
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||||
target.2
|
"Skipping migration that must not be applied",
|
||||||
),
|
i as u32,
|
||||||
i as u32,
|
upgrade_functions.len() as u32,
|
||||||
upgrade_path.len() as u32,
|
));
|
||||||
));
|
}
|
||||||
regenerate_stats |= upgrade.upgrade(wtxn, index, from, progress.clone())?;
|
|
||||||
index.put_version(wtxn, target)?;
|
|
||||||
current_version = target;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
index.put_version(wtxn, (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH))?;
|
||||||
|
|
||||||
Ok(regenerate_stats)
|
Ok(regenerate_stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
struct ToCurrentNoOp {}
|
|
||||||
|
|
||||||
impl UpgradeIndex for ToCurrentNoOp {
|
|
||||||
fn upgrade(
|
|
||||||
&self,
|
|
||||||
_wtxn: &mut RwTxn,
|
|
||||||
_index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
_progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
Ok(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
|
||||||
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Perform no operation during the upgrade except changing to the specified target version.
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
struct ToTargetNoOp {
|
|
||||||
pub target: (u32, u32, u32),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl UpgradeIndex for ToTargetNoOp {
|
|
||||||
fn upgrade(
|
|
||||||
&self,
|
|
||||||
_wtxn: &mut RwTxn,
|
|
||||||
_index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
_progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
Ok(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
|
||||||
self.target
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -4,17 +4,10 @@ use super::UpgradeIndex;
|
|||||||
use crate::progress::Progress;
|
use crate::progress::Progress;
|
||||||
use crate::{make_enum_progress, Index, Result};
|
use crate::{make_enum_progress, Index, Result};
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
pub(super) struct FixFieldDistribution {}
|
||||||
pub(super) struct V1_12_To_V1_12_3 {}
|
|
||||||
|
|
||||||
impl UpgradeIndex for V1_12_To_V1_12_3 {
|
impl UpgradeIndex for FixFieldDistribution {
|
||||||
fn upgrade(
|
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
make_enum_progress! {
|
make_enum_progress! {
|
||||||
enum FieldDistribution {
|
enum FieldDistribution {
|
||||||
RebuildingFieldDistribution,
|
RebuildingFieldDistribution,
|
||||||
@@ -25,27 +18,28 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||||
(1, 12, 3)
|
initial_version < (1, 12, 3)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> &'static str {
|
||||||
|
"Recomputing field distribution which was wrong before v1.12.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
pub(super) struct RecomputeStats {}
|
||||||
pub(super) struct V1_12_3_To_V1_13_0 {}
|
|
||||||
|
|
||||||
impl UpgradeIndex for V1_12_3_To_V1_13_0 {
|
impl UpgradeIndex for RecomputeStats {
|
||||||
fn upgrade(
|
fn upgrade(&self, _wtxn: &mut RwTxn, _index: &Index, _progress: Progress) -> Result<bool> {
|
||||||
&self,
|
|
||||||
_wtxn: &mut RwTxn,
|
|
||||||
_index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
_progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
// recompute the indexes stats
|
// recompute the indexes stats
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||||
(1, 13, 0)
|
initial_version < (1, 13, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> &'static str {
|
||||||
|
"Recomputing stats"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,17 +5,10 @@ use crate::database_stats::DatabaseStats;
|
|||||||
use crate::progress::Progress;
|
use crate::progress::Progress;
|
||||||
use crate::{make_enum_progress, Index, Result};
|
use crate::{make_enum_progress, Index, Result};
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
pub(super) struct AddNewStats();
|
||||||
pub(super) struct V1_13_0_To_V1_13_1();
|
|
||||||
|
|
||||||
impl UpgradeIndex for V1_13_0_To_V1_13_1 {
|
impl UpgradeIndex for AddNewStats {
|
||||||
fn upgrade(
|
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
make_enum_progress! {
|
make_enum_progress! {
|
||||||
enum DocumentsStats {
|
enum DocumentsStats {
|
||||||
CreatingDocumentsStats,
|
CreatingDocumentsStats,
|
||||||
@@ -30,26 +23,11 @@ impl UpgradeIndex for V1_13_0_To_V1_13_1 {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||||
(1, 13, 1)
|
initial_version < (1, 13, 1)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
fn description(&self) -> &'static str {
|
||||||
#[allow(non_camel_case_types)]
|
"Computing newly introduced document stats"
|
||||||
pub(super) struct V1_13_1_To_Latest_V1_13();
|
|
||||||
|
|
||||||
impl UpgradeIndex for V1_13_1_To_Latest_V1_13 {
|
|
||||||
fn upgrade(
|
|
||||||
&self,
|
|
||||||
_wtxn: &mut RwTxn,
|
|
||||||
_index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
_progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
Ok(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
|
||||||
(1, 13, 3)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,17 +5,10 @@ use super::UpgradeIndex;
|
|||||||
use crate::progress::Progress;
|
use crate::progress::Progress;
|
||||||
use crate::{make_enum_progress, Index, Result};
|
use crate::{make_enum_progress, Index, Result};
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
pub(super) struct UpgradeArroyVersion();
|
||||||
pub(super) struct Latest_V1_13_To_Latest_V1_14();
|
|
||||||
|
|
||||||
impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
|
impl UpgradeIndex for UpgradeArroyVersion {
|
||||||
fn upgrade(
|
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
make_enum_progress! {
|
make_enum_progress! {
|
||||||
enum VectorStore {
|
enum VectorStore {
|
||||||
UpdateInternalVersions,
|
UpdateInternalVersions,
|
||||||
@@ -35,7 +28,11 @@ impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
|
|||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||||
(1, 14, 0)
|
initial_version < (1, 14, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> &'static str {
|
||||||
|
"Updating vector store with an internal version"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,25 +7,21 @@ use crate::progress::Progress;
|
|||||||
use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
|
use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
pub(super) struct RecomputeWordFst();
|
||||||
pub(super) struct Latest_V1_14_To_Latest_V1_15();
|
|
||||||
|
|
||||||
impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
|
impl UpgradeIndex for RecomputeWordFst {
|
||||||
fn upgrade(
|
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
// Recompute the word FST from the word docids database.
|
// Recompute the word FST from the word docids database.
|
||||||
recompute_word_fst_from_word_docids_database(index, wtxn, &progress)?;
|
recompute_word_fst_from_word_docids_database(index, wtxn, &progress)?;
|
||||||
|
|
||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||||
|
initial_version < (1, 15, 0)
|
||||||
|
}
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
fn description(&self) -> &'static str {
|
||||||
(1, 15, 0)
|
"Recomputing word FST from word docids database as it was wrong before v1.15.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,17 +6,10 @@ use crate::progress::Progress;
|
|||||||
use crate::vector::db::{EmbedderInfo, EmbeddingStatus};
|
use crate::vector::db::{EmbedderInfo, EmbeddingStatus};
|
||||||
use crate::{Index, InternalError, Result};
|
use crate::{Index, InternalError, Result};
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
pub(super) struct SwitchToMultimodal();
|
||||||
pub(super) struct Latest_V1_15_To_V1_16_0();
|
|
||||||
|
|
||||||
impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
|
impl UpgradeIndex for SwitchToMultimodal {
|
||||||
fn upgrade(
|
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, _progress: Progress) -> Result<bool> {
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
index: &Index,
|
|
||||||
_original: (u32, u32, u32),
|
|
||||||
_progress: Progress,
|
|
||||||
) -> Result<bool> {
|
|
||||||
let v1_15_indexing_configs = index
|
let v1_15_indexing_configs = index
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, SerdeJson<Vec<super::v1_15::IndexEmbeddingConfig>>>()
|
.remap_types::<Str, SerdeJson<Vec<super::v1_15::IndexEmbeddingConfig>>>()
|
||||||
@@ -41,8 +34,11 @@ impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
|
|||||||
|
|
||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||||
|
initial_version < (1, 16, 0)
|
||||||
|
}
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
fn description(&self) -> &'static str {
|
||||||
(1, 16, 0)
|
"Migrating the database for multimodal support"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use candle_core::Tensor;
|
|||||||
use candle_nn::VarBuilder;
|
use candle_nn::VarBuilder;
|
||||||
use candle_transformers::models::bert::{BertModel, Config as BertConfig, DTYPE};
|
use candle_transformers::models::bert::{BertModel, Config as BertConfig, DTYPE};
|
||||||
use candle_transformers::models::modernbert::{Config as ModernConfig, ModernBert};
|
use candle_transformers::models::modernbert::{Config as ModernConfig, ModernBert};
|
||||||
|
use candle_transformers::models::xlm_roberta::{Config as XlmRobertaConfig, XLMRobertaModel};
|
||||||
// FIXME: currently we'll be using the hub to retrieve model, in the future we might want to embed it into Meilisearch itself
|
// FIXME: currently we'll be using the hub to retrieve model, in the future we might want to embed it into Meilisearch itself
|
||||||
use hf_hub::api::sync::Api;
|
use hf_hub::api::sync::Api;
|
||||||
use hf_hub::{Repo, RepoType};
|
use hf_hub::{Repo, RepoType};
|
||||||
@@ -89,6 +90,7 @@ impl Default for EmbedderOptions {
|
|||||||
enum ModelKind {
|
enum ModelKind {
|
||||||
Bert(BertModel),
|
Bert(BertModel),
|
||||||
Modern(ModernBert),
|
Modern(ModernBert),
|
||||||
|
XlmRoberta(XLMRobertaModel),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform embedding of documents and queries
|
/// Perform embedding of documents and queries
|
||||||
@@ -304,7 +306,8 @@ impl Embedder {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let is_modern = has_arch("modernbert");
|
let is_modern = has_arch("modernbert");
|
||||||
tracing::debug!(is_modern, model_type, "detected HF architecture");
|
let is_xlm_roberta = has_arch("xlm-roberta") || has_arch("xlm_roberta");
|
||||||
|
tracing::debug!(is_modern, is_xlm_roberta, model_type, "detected HF architecture");
|
||||||
|
|
||||||
let mut tokenizer = Tokenizer::from_file(&tokenizer_filename)
|
let mut tokenizer = Tokenizer::from_file(&tokenizer_filename)
|
||||||
.map_err(|inner| NewEmbedderError::open_tokenizer(tokenizer_filename, inner))?;
|
.map_err(|inner| NewEmbedderError::open_tokenizer(tokenizer_filename, inner))?;
|
||||||
@@ -340,6 +343,18 @@ impl Embedder {
|
|||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
ModelKind::Modern(ModernBert::load(vb, &config).map_err(NewEmbedderError::load_model)?)
|
ModelKind::Modern(ModernBert::load(vb, &config).map_err(NewEmbedderError::load_model)?)
|
||||||
|
} else if is_xlm_roberta {
|
||||||
|
let config: XlmRobertaConfig = serde_json::from_str(&config_str).map_err(|inner| {
|
||||||
|
NewEmbedderError::deserialize_config(
|
||||||
|
options.model.clone(),
|
||||||
|
config_str.clone(),
|
||||||
|
config_filename.clone(),
|
||||||
|
inner,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
ModelKind::XlmRoberta(
|
||||||
|
XLMRobertaModel::new(&config, vb).map_err(NewEmbedderError::load_model)?,
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
let config: BertConfig = serde_json::from_str(&config_str).map_err(|inner| {
|
let config: BertConfig = serde_json::from_str(&config_str).map_err(|inner| {
|
||||||
NewEmbedderError::deserialize_config(
|
NewEmbedderError::deserialize_config(
|
||||||
@@ -451,6 +466,19 @@ impl Embedder {
|
|||||||
let mask = Tensor::stack(&[mask], 0).map_err(EmbedError::tensor_shape)?;
|
let mask = Tensor::stack(&[mask], 0).map_err(EmbedError::tensor_shape)?;
|
||||||
model.forward(&token_ids, &mask).map_err(EmbedError::model_forward)?
|
model.forward(&token_ids, &mask).map_err(EmbedError::model_forward)?
|
||||||
}
|
}
|
||||||
|
ModelKind::XlmRoberta(model) => {
|
||||||
|
let mut mask_vec = tokens.get_attention_mask().to_vec();
|
||||||
|
if mask_vec.len() > self.max_len {
|
||||||
|
mask_vec.truncate(self.max_len);
|
||||||
|
}
|
||||||
|
let mask = Tensor::new(mask_vec.as_slice(), &self.device)
|
||||||
|
.map_err(EmbedError::tensor_shape)?;
|
||||||
|
let mask = Tensor::stack(&[mask], 0).map_err(EmbedError::tensor_shape)?;
|
||||||
|
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
|
||||||
|
model
|
||||||
|
.forward(&token_ids, &mask, &token_type_ids, None, None, None)
|
||||||
|
.map_err(EmbedError::model_forward)?
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let embedding = Self::pooling(embeddings, self.pooling)?;
|
let embedding = Self::pooling(embeddings, self.pooling)?;
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ impl<F> Embeddings<F> {
|
|||||||
///
|
///
|
||||||
/// If `embeddings.len() % self.dimension != 0`, then the append operation fails.
|
/// If `embeddings.len() % self.dimension != 0`, then the append operation fails.
|
||||||
pub fn append(&mut self, mut embeddings: Vec<F>) -> Result<(), Vec<F>> {
|
pub fn append(&mut self, mut embeddings: Vec<F>) -> Result<(), Vec<F>> {
|
||||||
if embeddings.len() % self.dimension != 0 {
|
if !embeddings.len().is_multiple_of(self.dimension) {
|
||||||
return Err(embeddings);
|
return Err(embeddings);
|
||||||
}
|
}
|
||||||
self.data.append(&mut embeddings);
|
self.data.append(&mut embeddings);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use hannoy::distances::{Cosine, Hamming};
|
use hannoy::distances::{Cosine, Hamming};
|
||||||
use hannoy::ItemId;
|
use hannoy::{ItemId, Searched};
|
||||||
use heed::{RoTxn, RwTxn, Unspecified};
|
use heed::{RoTxn, RwTxn, Unspecified};
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use rand::SeedableRng as _;
|
use rand::SeedableRng as _;
|
||||||
@@ -974,7 +974,7 @@ impl VectorStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(mut ret) = searcher.by_item(rtxn, item)? {
|
if let Some(mut ret) = searcher.by_item(rtxn, item)? {
|
||||||
results.append(&mut ret);
|
results.append(&mut ret.nns);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||||
@@ -1028,10 +1028,9 @@ impl VectorStore {
|
|||||||
searcher.candidates(filter);
|
searcher.candidates(filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (res, _degraded) =
|
let Searched { mut nns, did_cancel: _ } =
|
||||||
&mut searcher
|
searcher.by_vector_with_cancellation(rtxn, vector, || time_budget.exceeded())?;
|
||||||
.by_vector_with_cancellation(rtxn, vector, || time_budget.exceeded())?;
|
results.append(&mut nns);
|
||||||
results.append(res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||||
|
|||||||
@@ -10,3 +10,5 @@ serde_json = "1.0"
|
|||||||
clap = { version = "4.5.52", features = ["derive"] }
|
clap = { version = "4.5.52", features = ["derive"] }
|
||||||
anyhow = "1.0.100"
|
anyhow = "1.0.100"
|
||||||
utoipa = "5.4.0"
|
utoipa = "5.4.0"
|
||||||
|
reqwest = { version = "0.12", features = ["blocking"] }
|
||||||
|
regex = "1.10"
|
||||||
|
|||||||
@@ -1,21 +1,57 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::{Context, Result};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch::routes::MeilisearchApi;
|
use meilisearch::routes::MeilisearchApi;
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_json::{json, Value};
|
||||||
use utoipa::OpenApi;
|
use utoipa::OpenApi;
|
||||||
|
|
||||||
|
const HTTP_METHODS: &[&str] = &["get", "post", "put", "patch", "delete"];
|
||||||
|
|
||||||
|
/// Language used in the documentation repository (contains the key mapping)
|
||||||
|
const DOCS_LANG: &str = "cURL";
|
||||||
|
|
||||||
|
/// Mapping of repository URLs to language names.
|
||||||
|
/// The "cURL" entry is special: it contains the key mapping used to resolve sample IDs for all SDKs.
|
||||||
|
const CODE_SAMPLES: &[(&str, &str)] = &[
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/documentation/refs/heads/main/.code-samples.meilisearch.yaml", "cURL"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-dotnet/refs/heads/main/.code-samples.meilisearch.yaml", "C#"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-dart/refs/heads/main/.code-samples.meilisearch.yaml", "Dart"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-go/refs/heads/main/.code-samples.meilisearch.yaml", "Go"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-java/refs/heads/main/.code-samples.meilisearch.yaml", "Java"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-js/refs/heads/main/.code-samples.meilisearch.yaml", "JS"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-php/refs/heads/main/.code-samples.meilisearch.yaml", "PHP"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-python/refs/heads/main/.code-samples.meilisearch.yaml", "Python"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-ruby/refs/heads/main/.code-samples.meilisearch.yaml", "Ruby"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-rust/refs/heads/main/.code-samples.meilisearch.yaml", "Rust"),
|
||||||
|
("https://raw.githubusercontent.com/meilisearch/meilisearch-swift/refs/heads/main/.code-samples.meilisearch.yaml", "Swift"),
|
||||||
|
];
|
||||||
|
|
||||||
|
// Pre-compiled regex patterns
|
||||||
|
static COMMENT_RE: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"^#\s*([a-zA-Z0-9_]+)\s*$").unwrap());
|
||||||
|
static CODE_START_RE: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"^([a-zA-Z0-9_]+):\s*\|-\s*$").unwrap());
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(name = "openapi-generator")]
|
#[command(name = "openapi-generator")]
|
||||||
#[command(about = "Generate OpenAPI specification for Meilisearch")]
|
#[command(about = "Generate OpenAPI specification for Meilisearch")]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
/// Output file path (default: meilisearch.json)
|
/// Output file path (default: meilisearch-openapi.json)
|
||||||
#[arg(short, long, value_name = "FILE")]
|
#[arg(short, long, value_name = "FILE")]
|
||||||
output: Option<PathBuf>,
|
output: Option<PathBuf>,
|
||||||
|
|
||||||
/// Pretty print the JSON output
|
/// Pretty print the JSON output
|
||||||
#[arg(short, long)]
|
#[arg(short, long)]
|
||||||
pretty: bool,
|
pretty: bool,
|
||||||
|
|
||||||
|
/// Skip fetching code samples (offline mode)
|
||||||
|
#[arg(long)]
|
||||||
|
no_code_samples: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
@@ -24,14 +60,26 @@ fn main() -> Result<()> {
|
|||||||
// Generate the OpenAPI specification
|
// Generate the OpenAPI specification
|
||||||
let openapi = MeilisearchApi::openapi();
|
let openapi = MeilisearchApi::openapi();
|
||||||
|
|
||||||
|
// Convert to serde_json::Value for modification
|
||||||
|
let mut openapi_value: Value = serde_json::to_value(&openapi)?;
|
||||||
|
|
||||||
|
// Fetch and add code samples if not disabled
|
||||||
|
if !cli.no_code_samples {
|
||||||
|
let code_samples = fetch_all_code_samples()?;
|
||||||
|
add_code_samples_to_openapi(&mut openapi_value, &code_samples)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up null descriptions in tags
|
||||||
|
clean_null_descriptions(&mut openapi_value);
|
||||||
|
|
||||||
// Determine output path
|
// Determine output path
|
||||||
let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch.json"));
|
let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch-openapi.json"));
|
||||||
|
|
||||||
// Serialize to JSON
|
// Serialize to JSON
|
||||||
let json = if cli.pretty {
|
let json = if cli.pretty {
|
||||||
serde_json::to_string_pretty(&openapi)?
|
serde_json::to_string_pretty(&openapi_value)?
|
||||||
} else {
|
} else {
|
||||||
serde_json::to_string(&openapi)?
|
serde_json::to_string(&openapi_value)?
|
||||||
};
|
};
|
||||||
|
|
||||||
// Write to file
|
// Write to file
|
||||||
@@ -41,3 +89,487 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Code sample for a specific language
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct CodeSample {
|
||||||
|
lang: String,
|
||||||
|
source: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetch and parse code samples from all repositories
|
||||||
|
/// Returns a map from key (e.g., "get_indexes") to a list of code samples for different languages
|
||||||
|
fn fetch_all_code_samples() -> Result<HashMap<String, Vec<CodeSample>>> {
|
||||||
|
// First, fetch the documentation file (cURL) to get the key mapping
|
||||||
|
let (docs_url, _) = CODE_SAMPLES
|
||||||
|
.iter()
|
||||||
|
.find(|(_, lang)| *lang == DOCS_LANG)
|
||||||
|
.context("Documentation source not found in CODE_SAMPLES")?;
|
||||||
|
|
||||||
|
let docs_content = reqwest::blocking::get(*docs_url)
|
||||||
|
.context("Failed to fetch documentation code samples")?
|
||||||
|
.text()
|
||||||
|
.context("Failed to read documentation code samples response")?;
|
||||||
|
|
||||||
|
let key_to_sample_ids = parse_documentation_mapping(&docs_content);
|
||||||
|
|
||||||
|
// Fetch code samples from all sources
|
||||||
|
let mut all_samples: HashMap<String, Vec<CodeSample>> = HashMap::new();
|
||||||
|
|
||||||
|
for (url, lang) in CODE_SAMPLES {
|
||||||
|
// For cURL, reuse already fetched content; for SDKs, fetch from URL
|
||||||
|
let content: Cow<'_, str> = if *lang == DOCS_LANG {
|
||||||
|
Cow::Borrowed(&docs_content)
|
||||||
|
} else {
|
||||||
|
match reqwest::blocking::get(*url).and_then(|r| r.text()) {
|
||||||
|
Ok(text) => Cow::Owned(text),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Warning: Failed to fetch code samples for {}: {}", lang, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let sample_id_to_code = parse_code_samples(&content);
|
||||||
|
for (key, sample_ids) in &key_to_sample_ids {
|
||||||
|
for sample_id in sample_ids {
|
||||||
|
if let Some(source) = sample_id_to_code.get(sample_id) {
|
||||||
|
all_samples.entry(key.clone()).or_default().push(CodeSample {
|
||||||
|
lang: lang.to_string(),
|
||||||
|
source: source.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(all_samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the documentation file to create a mapping from keys (comment IDs) to sample IDs
|
||||||
|
/// Returns: HashMap<key, Vec<sample_id>>
|
||||||
|
fn parse_documentation_mapping(content: &str) -> HashMap<String, Vec<String>> {
|
||||||
|
let mut mapping: HashMap<String, Vec<String>> = HashMap::new();
|
||||||
|
let mut current_key: Option<String> = None;
|
||||||
|
|
||||||
|
for line in content.lines() {
|
||||||
|
// Check if this is a comment line defining a new key
|
||||||
|
if let Some(caps) = COMMENT_RE.captures(line) {
|
||||||
|
current_key = Some(caps[1].to_string());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this starts a new code block and extract the sample_id
|
||||||
|
if let Some(caps) = CODE_START_RE.captures(line) {
|
||||||
|
let sample_id = caps[1].to_string();
|
||||||
|
|
||||||
|
if let Some(ref key) = current_key {
|
||||||
|
// Only associate this sample_id with the current key if it follows the pattern {key}_N
|
||||||
|
// This prevents samples without a preceding comment from being incorrectly associated
|
||||||
|
if sample_id.starts_with(&format!("{}_", key)) {
|
||||||
|
mapping.entry(key.clone()).or_default().push(sample_id);
|
||||||
|
} else {
|
||||||
|
// Sample ID doesn't match the current key, reset current_key
|
||||||
|
current_key = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mapping
|
||||||
|
}
|
||||||
|
|
||||||
|
/// State machine for parsing YAML code blocks
|
||||||
|
struct YamlCodeBlockParser {
|
||||||
|
current_value: Vec<String>,
|
||||||
|
in_code_block: bool,
|
||||||
|
base_indent: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl YamlCodeBlockParser {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { current_value: Vec::new(), in_code_block: false, base_indent: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_new_block(&mut self) {
|
||||||
|
self.current_value.clear();
|
||||||
|
self.in_code_block = true;
|
||||||
|
self.base_indent = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn take_value(&mut self) -> Option<String> {
|
||||||
|
if self.current_value.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let value = self.current_value.join("\n").trim_end().to_string();
|
||||||
|
self.current_value.clear();
|
||||||
|
self.in_code_block = false;
|
||||||
|
self.base_indent = None;
|
||||||
|
Some(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_line(&mut self, line: &str) {
|
||||||
|
if !self.in_code_block {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty line or line with only whitespace
|
||||||
|
if line.trim().is_empty() {
|
||||||
|
// Only add empty lines if we've already started collecting
|
||||||
|
if !self.current_value.is_empty() {
|
||||||
|
self.current_value.push(String::new());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate indentation
|
||||||
|
let indent = line.len() - line.trim_start().len();
|
||||||
|
|
||||||
|
// Set base indent from first non-empty line
|
||||||
|
let base = *self.base_indent.get_or_insert(indent);
|
||||||
|
|
||||||
|
// If line has less indentation than base, we've exited the block
|
||||||
|
if indent < base {
|
||||||
|
self.in_code_block = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove base indentation and add to value
|
||||||
|
let dedented = line.get(base..).unwrap_or_else(|| line.trim_start());
|
||||||
|
self.current_value.push(dedented.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a code samples YAML file
|
||||||
|
/// Returns: HashMap<sample_id, code>
|
||||||
|
fn parse_code_samples(content: &str) -> HashMap<String, String> {
|
||||||
|
let mut samples: HashMap<String, String> = HashMap::new();
|
||||||
|
let mut current_sample_id: Option<String> = None;
|
||||||
|
let mut parser = YamlCodeBlockParser::new();
|
||||||
|
|
||||||
|
for line in content.lines() {
|
||||||
|
// Ignore comment lines
|
||||||
|
if line.starts_with('#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this starts a new code block
|
||||||
|
if let Some(caps) = CODE_START_RE.captures(line) {
|
||||||
|
// Save previous sample if exists
|
||||||
|
if let Some(sample_id) = current_sample_id.take() {
|
||||||
|
if let Some(value) = parser.take_value() {
|
||||||
|
samples.insert(sample_id, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_sample_id = Some(caps[1].to_string());
|
||||||
|
parser.start_new_block();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if current_sample_id.is_some() {
|
||||||
|
parser.process_line(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't forget the last sample
|
||||||
|
if let Some(sample_id) = current_sample_id {
|
||||||
|
if let Some(value) = parser.take_value() {
|
||||||
|
samples.insert(sample_id, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
samples
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert an OpenAPI path to a code sample key
|
||||||
|
/// Path: /indexes/{index_uid}/documents/{document_id}
|
||||||
|
/// Method: GET
|
||||||
|
/// Key: get_indexes_indexUid_documents_documentId
|
||||||
|
fn path_to_key(path: &str, method: &str) -> String {
|
||||||
|
let method_lower = method.to_lowercase();
|
||||||
|
|
||||||
|
// Remove leading slash and convert path
|
||||||
|
let path_part = path
|
||||||
|
.trim_start_matches('/')
|
||||||
|
.split('/')
|
||||||
|
.map(|segment| {
|
||||||
|
if segment.starts_with('{') && segment.ends_with('}') {
|
||||||
|
// Convert {param_name} to camelCase
|
||||||
|
let param = &segment[1..segment.len() - 1];
|
||||||
|
to_camel_case(param)
|
||||||
|
} else {
|
||||||
|
// Keep path segments as-is, but replace hyphens with underscores
|
||||||
|
segment.replace('-', "_")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("_");
|
||||||
|
|
||||||
|
if path_part.is_empty() {
|
||||||
|
method_lower
|
||||||
|
} else {
|
||||||
|
format!("{}_{}", method_lower, path_part)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert snake_case to camelCase
|
||||||
|
fn to_camel_case(s: &str) -> String {
|
||||||
|
let mut result = String::with_capacity(s.len());
|
||||||
|
let mut capitalize_next = false;
|
||||||
|
|
||||||
|
for (i, c) in s.chars().enumerate() {
|
||||||
|
match c {
|
||||||
|
'_' => capitalize_next = true,
|
||||||
|
_ if capitalize_next => {
|
||||||
|
result.push(c.to_ascii_uppercase());
|
||||||
|
capitalize_next = false;
|
||||||
|
}
|
||||||
|
_ if i == 0 => result.push(c.to_ascii_lowercase()),
|
||||||
|
_ => result.push(c),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add code samples to the OpenAPI specification
|
||||||
|
fn add_code_samples_to_openapi(
|
||||||
|
openapi: &mut Value,
|
||||||
|
code_samples: &HashMap<String, Vec<CodeSample>>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let paths = openapi
|
||||||
|
.get_mut("paths")
|
||||||
|
.and_then(|p| p.as_object_mut())
|
||||||
|
.context("OpenAPI spec missing 'paths' object")?;
|
||||||
|
|
||||||
|
for (path, path_item) in paths.iter_mut() {
|
||||||
|
let Some(path_item) = path_item.as_object_mut() else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
for method in HTTP_METHODS {
|
||||||
|
let Some(operation) = path_item.get_mut(*method) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let key = path_to_key(path, method);
|
||||||
|
|
||||||
|
if let Some(samples) = code_samples.get(&key) {
|
||||||
|
// Create x-codeSamples array according to Redocly spec
|
||||||
|
// Sort by language name for consistent output
|
||||||
|
let mut sorted_samples = samples.clone();
|
||||||
|
sorted_samples.sort_by(|a, b| a.lang.cmp(&b.lang));
|
||||||
|
|
||||||
|
let code_sample_array: Vec<Value> = sorted_samples
|
||||||
|
.iter()
|
||||||
|
.map(|sample| {
|
||||||
|
json!({
|
||||||
|
"lang": sample.lang,
|
||||||
|
"source": sample.source
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if let Some(op) = operation.as_object_mut() {
|
||||||
|
op.insert("x-codeSamples".to_string(), json!(code_sample_array));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clean up null descriptions in tags to make Mintlify work
|
||||||
|
/// Removes any "description" fields with null values (both JSON null and "null" string)
|
||||||
|
/// from the tags array and all nested objects
|
||||||
|
fn clean_null_descriptions(openapi: &mut Value) {
|
||||||
|
if let Some(tags) = openapi.get_mut("tags").and_then(|t| t.as_array_mut()) {
|
||||||
|
for tag in tags.iter_mut() {
|
||||||
|
remove_null_descriptions_recursive(tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively remove all "description" fields that are null or "null" string
|
||||||
|
fn remove_null_descriptions_recursive(value: &mut Value) {
|
||||||
|
if let Some(obj) = value.as_object_mut() {
|
||||||
|
// Check and remove description if it's null or "null" string
|
||||||
|
if let Some(desc) = obj.get("description") {
|
||||||
|
if desc.is_null() || (desc.is_string() && desc.as_str() == Some("null")) {
|
||||||
|
obj.remove("description");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively process all nested objects
|
||||||
|
for (_, v) in obj.iter_mut() {
|
||||||
|
remove_null_descriptions_recursive(v);
|
||||||
|
}
|
||||||
|
} else if let Some(arr) = value.as_array_mut() {
|
||||||
|
// Recursively process arrays
|
||||||
|
for item in arr.iter_mut() {
|
||||||
|
remove_null_descriptions_recursive(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_path_to_key() {
|
||||||
|
assert_eq!(path_to_key("/indexes", "GET"), "get_indexes");
|
||||||
|
assert_eq!(path_to_key("/indexes/{index_uid}", "GET"), "get_indexes_indexUid");
|
||||||
|
assert_eq!(
|
||||||
|
path_to_key("/indexes/{index_uid}/documents", "POST"),
|
||||||
|
"post_indexes_indexUid_documents"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
path_to_key("/indexes/{index_uid}/documents/{document_id}", "GET"),
|
||||||
|
"get_indexes_indexUid_documents_documentId"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
path_to_key("/indexes/{index_uid}/settings/stop-words", "GET"),
|
||||||
|
"get_indexes_indexUid_settings_stop_words"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_to_camel_case() {
|
||||||
|
assert_eq!(to_camel_case("index_uid"), "indexUid");
|
||||||
|
assert_eq!(to_camel_case("document_id"), "documentId");
|
||||||
|
assert_eq!(to_camel_case("task_uid"), "taskUid");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_documentation_mapping() {
|
||||||
|
let yaml = r#"
|
||||||
|
# get_indexes
|
||||||
|
get_indexes_1: |-
|
||||||
|
curl \
|
||||||
|
-X GET 'MEILISEARCH_URL/indexes'
|
||||||
|
get_indexes_2: |-
|
||||||
|
curl \
|
||||||
|
-X GET 'MEILISEARCH_URL/indexes?limit=5'
|
||||||
|
# post_indexes
|
||||||
|
post_indexes_1: |-
|
||||||
|
curl \
|
||||||
|
-X POST 'MEILISEARCH_URL/indexes'
|
||||||
|
post_indexes_2: |-
|
||||||
|
curl \
|
||||||
|
-X POST 'MEILISEARCH_URL/indexes'
|
||||||
|
# get_version
|
||||||
|
get_version_1: |-
|
||||||
|
curl \
|
||||||
|
-X GET 'MEILISEARCH_URL/version'
|
||||||
|
# COMMENT WITHOUT KEY - SHOULD BE IGNORED
|
||||||
|
## COMMENT WITHOUT KEY - SHOULD BE IGNORED
|
||||||
|
unrelated_sample_without_comment: |-
|
||||||
|
curl \
|
||||||
|
-X GET 'MEILISEARCH_URL/something'
|
||||||
|
"#;
|
||||||
|
let mapping = parse_documentation_mapping(yaml);
|
||||||
|
|
||||||
|
assert_eq!(mapping.len(), 3);
|
||||||
|
assert!(mapping.contains_key("get_indexes"));
|
||||||
|
assert!(mapping.contains_key("post_indexes"));
|
||||||
|
assert!(mapping.contains_key("get_version"));
|
||||||
|
assert_eq!(mapping["get_indexes"], vec!["get_indexes_1", "get_indexes_2"]);
|
||||||
|
assert_eq!(mapping["post_indexes"], vec!["post_indexes_1", "post_indexes_2"]);
|
||||||
|
assert_eq!(mapping["get_version"], vec!["get_version_1"]);
|
||||||
|
// unrelated_sample_without_comment should not be in the mapping
|
||||||
|
assert!(!mapping.values().any(|v| v.contains(&"unrelated_sample_without_comment".to_string())));
|
||||||
|
// Comments with multiple words or ## should be ignored and not create keys
|
||||||
|
assert!(!mapping.contains_key("COMMENT"));
|
||||||
|
assert!(!mapping.contains_key("##"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_code_samples() {
|
||||||
|
let yaml = r#"
|
||||||
|
# This is a comment that should be ignored
|
||||||
|
list_all_indexes_1: |-
|
||||||
|
const client = new MeiliSearch({
|
||||||
|
host: 'http://localhost:7700',
|
||||||
|
apiKey: 'masterKey'
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await client.getIndexes();
|
||||||
|
|
||||||
|
# Another comment
|
||||||
|
create_an_index_1: |-
|
||||||
|
const task = await client.createIndex('movies');
|
||||||
|
"#;
|
||||||
|
let samples = parse_code_samples(yaml);
|
||||||
|
|
||||||
|
assert_eq!(samples.len(), 2);
|
||||||
|
assert!(samples.contains_key("list_all_indexes_1"));
|
||||||
|
assert!(samples.contains_key("create_an_index_1"));
|
||||||
|
assert!(samples["list_all_indexes_1"].contains("getIndexes"));
|
||||||
|
assert!(samples["create_an_index_1"].contains("createIndex"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_clean_null_descriptions() {
|
||||||
|
let mut openapi = json!({
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"name": "Test1",
|
||||||
|
"description": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Test2",
|
||||||
|
"description": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Test3",
|
||||||
|
"description": "Valid description"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Test4",
|
||||||
|
"description": "null",
|
||||||
|
"externalDocs": {
|
||||||
|
"url": "https://example.com",
|
||||||
|
"description": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Test5",
|
||||||
|
"externalDocs": {
|
||||||
|
"url": "https://example.com",
|
||||||
|
"description": "null"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
clean_null_descriptions(&mut openapi);
|
||||||
|
|
||||||
|
let tags = openapi["tags"].as_array().unwrap();
|
||||||
|
|
||||||
|
// Test1: description "null" should be removed
|
||||||
|
assert!(!tags[0].as_object().unwrap().contains_key("description"));
|
||||||
|
|
||||||
|
// Test2: description null should be removed
|
||||||
|
assert!(!tags[1].as_object().unwrap().contains_key("description"));
|
||||||
|
|
||||||
|
// Test3: valid description should remain
|
||||||
|
assert_eq!(tags[2]["description"], "Valid description");
|
||||||
|
|
||||||
|
// Test4: both tag description and externalDocs description should be removed
|
||||||
|
assert!(!tags[3].as_object().unwrap().contains_key("description"));
|
||||||
|
assert!(!tags[3]["externalDocs"]
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.contains_key("description"));
|
||||||
|
assert_eq!(tags[3]["externalDocs"]["url"], "https://example.com");
|
||||||
|
|
||||||
|
// Test5: externalDocs description "null" should be removed
|
||||||
|
assert!(!tags[4]["externalDocs"]
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.contains_key("description"));
|
||||||
|
assert_eq!(tags[4]["externalDocs"]["url"], "https://example.com");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ reqwest = { version = "0.12.24", features = [
|
|||||||
"json",
|
"json",
|
||||||
"rustls-tls",
|
"rustls-tls",
|
||||||
], default-features = false }
|
], default-features = false }
|
||||||
|
semver = "1.0.27"
|
||||||
serde = { version = "1.0.228", features = ["derive"] }
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
serde_json = "1.0.145"
|
serde_json = "1.0.145"
|
||||||
sha2 = "0.10.9"
|
sha2 = "0.10.9"
|
||||||
@@ -42,3 +43,4 @@ tracing = "0.1.41"
|
|||||||
tracing-subscriber = "0.3.20"
|
tracing-subscriber = "0.3.20"
|
||||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||||
uuid = { version = "1.18.1", features = ["v7", "serde"] }
|
uuid = { version = "1.18.1", features = ["v7", "serde"] }
|
||||||
|
similar-asserts = "1.7.0"
|
||||||
|
|||||||
@@ -1,194 +0,0 @@
|
|||||||
use std::collections::BTreeMap;
|
|
||||||
use std::fmt::Display;
|
|
||||||
use std::io::Read as _;
|
|
||||||
|
|
||||||
use anyhow::{bail, Context as _};
|
|
||||||
use serde::Deserialize;
|
|
||||||
|
|
||||||
use super::assets::{fetch_asset, Asset};
|
|
||||||
use super::client::{Client, Method};
|
|
||||||
|
|
||||||
#[derive(Clone, Deserialize)]
|
|
||||||
pub struct Command {
|
|
||||||
pub route: String,
|
|
||||||
pub method: Method,
|
|
||||||
#[serde(default)]
|
|
||||||
pub body: Body,
|
|
||||||
#[serde(default)]
|
|
||||||
pub synchronous: SyncMode,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Clone, Deserialize)]
|
|
||||||
#[serde(untagged)]
|
|
||||||
pub enum Body {
|
|
||||||
Inline {
|
|
||||||
inline: serde_json::Value,
|
|
||||||
},
|
|
||||||
Asset {
|
|
||||||
asset: String,
|
|
||||||
},
|
|
||||||
#[default]
|
|
||||||
Empty,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Body {
|
|
||||||
pub fn get(
|
|
||||||
self,
|
|
||||||
assets: &BTreeMap<String, Asset>,
|
|
||||||
asset_folder: &str,
|
|
||||||
) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
|
|
||||||
Ok(match self {
|
|
||||||
Body::Inline { inline: body } => Some((
|
|
||||||
serde_json::to_vec(&body)
|
|
||||||
.context("serializing to bytes")
|
|
||||||
.context("while getting inline body")?,
|
|
||||||
"application/json",
|
|
||||||
)),
|
|
||||||
Body::Asset { asset: name } => Some({
|
|
||||||
let context = || format!("while getting body from asset '{name}'");
|
|
||||||
let (mut file, format) =
|
|
||||||
fetch_asset(&name, assets, asset_folder).with_context(context)?;
|
|
||||||
let mut buf = Vec::new();
|
|
||||||
file.read_to_end(&mut buf).with_context(context)?;
|
|
||||||
(buf, format.to_content_type(&name))
|
|
||||||
}),
|
|
||||||
Body::Empty => None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for Command {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy, Deserialize)]
|
|
||||||
pub enum SyncMode {
|
|
||||||
DontWait,
|
|
||||||
#[default]
|
|
||||||
WaitForResponse,
|
|
||||||
WaitForTask,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn run_batch(
|
|
||||||
client: &Client,
|
|
||||||
batch: &[Command],
|
|
||||||
assets: &BTreeMap<String, Asset>,
|
|
||||||
asset_folder: &str,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let [.., last] = batch else { return Ok(()) };
|
|
||||||
let sync = last.synchronous;
|
|
||||||
|
|
||||||
let mut tasks = tokio::task::JoinSet::new();
|
|
||||||
|
|
||||||
for command in batch {
|
|
||||||
// FIXME: you probably don't want to copy assets everytime here
|
|
||||||
tasks.spawn({
|
|
||||||
let client = client.clone();
|
|
||||||
let command = command.clone();
|
|
||||||
let assets = assets.clone();
|
|
||||||
let asset_folder = asset_folder.to_owned();
|
|
||||||
|
|
||||||
async move { run(client, command, &assets, &asset_folder).await }
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
while let Some(result) = tasks.join_next().await {
|
|
||||||
result
|
|
||||||
.context("panicked while executing command")?
|
|
||||||
.context("error while executing command")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
match sync {
|
|
||||||
SyncMode::DontWait => {}
|
|
||||||
SyncMode::WaitForResponse => {}
|
|
||||||
SyncMode::WaitForTask => wait_for_tasks(client).await?,
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
|
|
||||||
loop {
|
|
||||||
let response = client
|
|
||||||
.get("tasks?statuses=enqueued,processing")
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.context("could not wait for tasks")?;
|
|
||||||
let response: serde_json::Value = response
|
|
||||||
.json()
|
|
||||||
.await
|
|
||||||
.context("could not deserialize response to JSON")
|
|
||||||
.context("could not wait for tasks")?;
|
|
||||||
match response.get("total") {
|
|
||||||
Some(serde_json::Value::Number(number)) => {
|
|
||||||
let number = number.as_u64().with_context(|| {
|
|
||||||
format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
|
|
||||||
})?;
|
|
||||||
if number == 0 {
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(thing_else) => {
|
|
||||||
bail!(format!(
|
|
||||||
"waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
|
|
||||||
))
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
bail!(format!(
|
|
||||||
"waiting for tasks: expected response to contain 'total', got '{response}'"
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))]
|
|
||||||
pub async fn run(
|
|
||||||
client: Client,
|
|
||||||
mut command: Command,
|
|
||||||
assets: &BTreeMap<String, Asset>,
|
|
||||||
asset_folder: &str,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
// memtake the body here to leave an empty body in its place, so that command is not partially moved-out
|
|
||||||
let body = std::mem::take(&mut command.body)
|
|
||||||
.get(assets, asset_folder)
|
|
||||||
.with_context(|| format!("while getting body for command {command}"))?;
|
|
||||||
|
|
||||||
let request = client.request(command.method.into(), &command.route);
|
|
||||||
|
|
||||||
let request = if let Some((body, content_type)) = body {
|
|
||||||
request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
|
|
||||||
} else {
|
|
||||||
request
|
|
||||||
};
|
|
||||||
|
|
||||||
let response =
|
|
||||||
request.send().await.with_context(|| format!("error sending command: {}", command))?;
|
|
||||||
|
|
||||||
let code = response.status();
|
|
||||||
if code.is_client_error() {
|
|
||||||
tracing::error!(%command, %code, "error in workload file");
|
|
||||||
let response: serde_json::Value = response
|
|
||||||
.json()
|
|
||||||
.await
|
|
||||||
.context("could not deserialize response as JSON")
|
|
||||||
.context("parsing error in workload file when sending command")?;
|
|
||||||
bail!("error in workload file: server responded with error code {code} and '{response}'")
|
|
||||||
} else if code.is_server_error() {
|
|
||||||
tracing::error!(%command, %code, "server error");
|
|
||||||
let response: serde_json::Value = response
|
|
||||||
.json()
|
|
||||||
.await
|
|
||||||
.context("could not deserialize response as JSON")
|
|
||||||
.context("parsing server error when sending command")?;
|
|
||||||
bail!("server error: server responded with error code {code} and '{response}'")
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@@ -7,9 +7,9 @@ use tokio::task::AbortHandle;
|
|||||||
use tracing_trace::processor::span_stats::CallStats;
|
use tracing_trace::processor::span_stats::CallStats;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::client::Client;
|
|
||||||
use super::env_info;
|
use super::env_info;
|
||||||
use super::workload::Workload;
|
use super::workload::BenchWorkload;
|
||||||
|
use crate::common::client::Client;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum DashboardClient {
|
pub enum DashboardClient {
|
||||||
@@ -89,7 +89,7 @@ impl DashboardClient {
|
|||||||
pub async fn create_workload(
|
pub async fn create_workload(
|
||||||
&self,
|
&self,
|
||||||
invocation_uuid: Uuid,
|
invocation_uuid: Uuid,
|
||||||
workload: &Workload,
|
workload: &BenchWorkload,
|
||||||
) -> anyhow::Result<Uuid> {
|
) -> anyhow::Result<Uuid> {
|
||||||
let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
|
let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
|
||||||
|
|
||||||
|
|||||||
@@ -1,51 +1,36 @@
|
|||||||
mod assets;
|
|
||||||
mod client;
|
|
||||||
mod command;
|
|
||||||
mod dashboard;
|
mod dashboard;
|
||||||
mod env_info;
|
mod env_info;
|
||||||
mod meili_process;
|
|
||||||
mod workload;
|
mod workload;
|
||||||
|
|
||||||
use std::io::LineWriter;
|
use crate::common::args::CommonArgs;
|
||||||
use std::path::PathBuf;
|
use crate::common::logs::setup_logs;
|
||||||
|
use crate::common::workload::Workload;
|
||||||
|
use std::{path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::{bail, Context};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use tracing_subscriber::fmt::format::FmtSpan;
|
|
||||||
use tracing_subscriber::layer::SubscriberExt;
|
|
||||||
use tracing_subscriber::Layer;
|
|
||||||
|
|
||||||
use self::client::Client;
|
use crate::common::client::Client;
|
||||||
use self::workload::Workload;
|
pub use workload::BenchWorkload;
|
||||||
|
|
||||||
pub fn default_http_addr() -> String {
|
|
||||||
"127.0.0.1:7700".to_string()
|
|
||||||
}
|
|
||||||
pub fn default_report_folder() -> String {
|
pub fn default_report_folder() -> String {
|
||||||
"./bench/reports/".into()
|
"./bench/reports/".into()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn default_asset_folder() -> String {
|
|
||||||
"./bench/assets/".into()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn default_log_filter() -> String {
|
|
||||||
"info".into()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn default_dashboard_url() -> String {
|
pub fn default_dashboard_url() -> String {
|
||||||
"http://localhost:9001".into()
|
"http://localhost:9001".into()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run benchmarks from a workload
|
/// Run benchmarks from a workload
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
pub struct BenchDeriveArgs {
|
pub struct BenchArgs {
|
||||||
/// Filename of the workload file, pass multiple filenames
|
/// Common arguments shared with other commands
|
||||||
/// to run multiple workloads in the specified order.
|
#[command(flatten)]
|
||||||
///
|
common: CommonArgs,
|
||||||
/// Each workload run will get its own report file.
|
|
||||||
#[arg(value_name = "WORKLOAD_FILE", last = false)]
|
/// Meilisearch master keys
|
||||||
workload_file: Vec<PathBuf>,
|
#[arg(long)]
|
||||||
|
pub master_key: Option<String>,
|
||||||
|
|
||||||
/// URL of the dashboard.
|
/// URL of the dashboard.
|
||||||
#[arg(long, default_value_t = default_dashboard_url())]
|
#[arg(long, default_value_t = default_dashboard_url())]
|
||||||
@@ -59,34 +44,14 @@ pub struct BenchDeriveArgs {
|
|||||||
#[arg(long, default_value_t = default_report_folder())]
|
#[arg(long, default_value_t = default_report_folder())]
|
||||||
report_folder: String,
|
report_folder: String,
|
||||||
|
|
||||||
/// Directory to store the remote assets.
|
|
||||||
#[arg(long, default_value_t = default_asset_folder())]
|
|
||||||
asset_folder: String,
|
|
||||||
|
|
||||||
/// Log directives
|
|
||||||
#[arg(short, long, default_value_t = default_log_filter())]
|
|
||||||
log_filter: String,
|
|
||||||
|
|
||||||
/// Benchmark dashboard API key
|
/// Benchmark dashboard API key
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
api_key: Option<String>,
|
api_key: Option<String>,
|
||||||
|
|
||||||
/// Meilisearch master keys
|
|
||||||
#[arg(long)]
|
|
||||||
master_key: Option<String>,
|
|
||||||
|
|
||||||
/// Authentication bearer for fetching assets
|
|
||||||
#[arg(long)]
|
|
||||||
assets_key: Option<String>,
|
|
||||||
|
|
||||||
/// Reason for the benchmark invocation
|
/// Reason for the benchmark invocation
|
||||||
#[arg(short, long)]
|
#[arg(short, long)]
|
||||||
reason: Option<String>,
|
reason: Option<String>,
|
||||||
|
|
||||||
/// The maximum time in seconds we allow for fetching the task queue before timing out.
|
|
||||||
#[arg(long, default_value_t = 60)]
|
|
||||||
tasks_queue_timeout_secs: u64,
|
|
||||||
|
|
||||||
/// The path to the binary to run.
|
/// The path to the binary to run.
|
||||||
///
|
///
|
||||||
/// If unspecified, runs `cargo run` after building Meilisearch with `cargo build`.
|
/// If unspecified, runs `cargo run` after building Meilisearch with `cargo build`.
|
||||||
@@ -94,18 +59,8 @@ pub struct BenchDeriveArgs {
|
|||||||
binary_path: Option<PathBuf>,
|
binary_path: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
pub fn run(args: BenchArgs) -> anyhow::Result<()> {
|
||||||
// setup logs
|
setup_logs(&args.common.log_filter)?;
|
||||||
let filter: tracing_subscriber::filter::Targets =
|
|
||||||
args.log_filter.parse().context("invalid --log-filter")?;
|
|
||||||
|
|
||||||
let subscriber = tracing_subscriber::registry().with(
|
|
||||||
tracing_subscriber::fmt::layer()
|
|
||||||
.with_writer(|| LineWriter::new(std::io::stderr()))
|
|
||||||
.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
|
|
||||||
.with_filter(filter),
|
|
||||||
);
|
|
||||||
tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?;
|
|
||||||
|
|
||||||
// fetch environment and build info
|
// fetch environment and build info
|
||||||
let env = env_info::Environment::generate_from_current_config();
|
let env = env_info::Environment::generate_from_current_config();
|
||||||
@@ -116,8 +71,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
|||||||
let _scope = rt.enter();
|
let _scope = rt.enter();
|
||||||
|
|
||||||
// setup clients
|
// setup clients
|
||||||
let assets_client =
|
let assets_client = Client::new(
|
||||||
Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
|
None,
|
||||||
|
args.common.assets_key.as_deref(),
|
||||||
|
Some(std::time::Duration::from_secs(3600)), // 1h
|
||||||
|
)?;
|
||||||
|
|
||||||
let dashboard_client = if args.no_dashboard {
|
let dashboard_client = if args.no_dashboard {
|
||||||
dashboard::DashboardClient::new_dry()
|
dashboard::DashboardClient::new_dry()
|
||||||
@@ -134,11 +92,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
|||||||
None,
|
None,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let meili_client = Client::new(
|
let meili_client = Arc::new(Client::new(
|
||||||
Some("http://127.0.0.1:7700".into()),
|
Some("http://127.0.0.1:7700".into()),
|
||||||
args.master_key.as_deref(),
|
args.master_key.as_deref(),
|
||||||
Some(std::time::Duration::from_secs(args.tasks_queue_timeout_secs)),
|
Some(std::time::Duration::from_secs(args.common.tasks_queue_timeout_secs)),
|
||||||
)?;
|
)?);
|
||||||
|
|
||||||
// enter runtime
|
// enter runtime
|
||||||
|
|
||||||
@@ -146,11 +104,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
|||||||
dashboard_client.send_machine_info(&env).await?;
|
dashboard_client.send_machine_info(&env).await?;
|
||||||
|
|
||||||
let commit_message = build_info.commit_msg.unwrap_or_default().split('\n').next().unwrap();
|
let commit_message = build_info.commit_msg.unwrap_or_default().split('\n').next().unwrap();
|
||||||
let max_workloads = args.workload_file.len();
|
let max_workloads = args.common.workload_file.len();
|
||||||
let reason: Option<&str> = args.reason.as_deref();
|
let reason: Option<&str> = args.reason.as_deref();
|
||||||
let invocation_uuid = dashboard_client.create_invocation(build_info.clone(), commit_message, env, max_workloads, reason).await?;
|
let invocation_uuid = dashboard_client.create_invocation(build_info.clone(), commit_message, env, max_workloads, reason).await?;
|
||||||
|
|
||||||
tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
|
tracing::info!(workload_count = args.common.workload_file.len(), "handling workload files");
|
||||||
|
|
||||||
// main task
|
// main task
|
||||||
let workload_runs = tokio::spawn(
|
let workload_runs = tokio::spawn(
|
||||||
@@ -158,13 +116,17 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
|||||||
let dashboard_client = dashboard_client.clone();
|
let dashboard_client = dashboard_client.clone();
|
||||||
let mut dashboard_urls = Vec::new();
|
let mut dashboard_urls = Vec::new();
|
||||||
async move {
|
async move {
|
||||||
for workload_file in args.workload_file.iter() {
|
for workload_file in args.common.workload_file.iter() {
|
||||||
let workload: Workload = serde_json::from_reader(
|
let workload: Workload = serde_json::from_reader(
|
||||||
std::fs::File::open(workload_file)
|
std::fs::File::open(workload_file)
|
||||||
.with_context(|| format!("error opening {}", workload_file.display()))?,
|
.with_context(|| format!("error opening {}", workload_file.display()))?,
|
||||||
)
|
)
|
||||||
.with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
|
.with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
|
||||||
|
|
||||||
|
let Workload::Bench(workload) = workload else {
|
||||||
|
bail!("workload file {} is not a bench workload", workload_file.display());
|
||||||
|
};
|
||||||
|
|
||||||
let workload_name = workload.name.clone();
|
let workload_name = workload.name.clone();
|
||||||
|
|
||||||
workload::execute(
|
workload::execute(
|
||||||
|
|||||||
@@ -1,24 +1,28 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{Seek as _, Write as _};
|
use std::io::{Seek as _, Write as _};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::{bail, Context as _};
|
use anyhow::{bail, Context as _};
|
||||||
use futures_util::TryStreamExt as _;
|
use futures_util::TryStreamExt as _;
|
||||||
use serde::Deserialize;
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::assets::Asset;
|
|
||||||
use super::client::Client;
|
|
||||||
use super::command::SyncMode;
|
|
||||||
use super::dashboard::DashboardClient;
|
use super::dashboard::DashboardClient;
|
||||||
use super::BenchDeriveArgs;
|
use super::BenchArgs;
|
||||||
use crate::bench::{assets, meili_process};
|
use crate::common::assets::{self, Asset};
|
||||||
|
use crate::common::client::Client;
|
||||||
|
use crate::common::command::{run_commands, Command};
|
||||||
|
use crate::common::instance::Binary;
|
||||||
|
use crate::common::process::{self, delete_db, start_meili};
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
/// A bench workload.
|
||||||
pub struct Workload {
|
/// Not to be confused with [a test workload](crate::test::workload::Workload).
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
pub struct BenchWorkload {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub run_count: u16,
|
pub run_count: u16,
|
||||||
pub extra_cli_args: Vec<String>,
|
pub extra_cli_args: Vec<String>,
|
||||||
@@ -26,30 +30,34 @@ pub struct Workload {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub target: String,
|
pub target: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub precommands: Vec<super::command::Command>,
|
pub precommands: Vec<Command>,
|
||||||
pub commands: Vec<super::command::Command>,
|
pub commands: Vec<Command>,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_commands(
|
async fn run_workload_commands(
|
||||||
dashboard_client: &DashboardClient,
|
dashboard_client: &DashboardClient,
|
||||||
logs_client: &Client,
|
logs_client: &Client,
|
||||||
meili_client: &Client,
|
meili_client: &Arc<Client>,
|
||||||
workload_uuid: Uuid,
|
workload_uuid: Uuid,
|
||||||
workload: &Workload,
|
workload: &BenchWorkload,
|
||||||
args: &BenchDeriveArgs,
|
args: &BenchArgs,
|
||||||
run_number: u16,
|
run_number: u16,
|
||||||
) -> anyhow::Result<JoinHandle<anyhow::Result<File>>> {
|
) -> anyhow::Result<JoinHandle<anyhow::Result<File>>> {
|
||||||
let report_folder = &args.report_folder;
|
let report_folder = &args.report_folder;
|
||||||
let workload_name = &workload.name;
|
let workload_name = &workload.name;
|
||||||
|
let assets = Arc::new(workload.assets.clone());
|
||||||
|
let asset_folder = args.common.asset_folder.clone().leak();
|
||||||
|
|
||||||
for batch in workload
|
run_commands(
|
||||||
.precommands
|
meili_client,
|
||||||
.as_slice()
|
&workload.precommands,
|
||||||
.split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
|
0,
|
||||||
{
|
&assets,
|
||||||
super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
|
asset_folder,
|
||||||
.await?;
|
&mut HashMap::new(),
|
||||||
}
|
false,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
std::fs::create_dir_all(report_folder)
|
std::fs::create_dir_all(report_folder)
|
||||||
.with_context(|| format!("could not create report directory at {report_folder}"))?;
|
.with_context(|| format!("could not create report directory at {report_folder}"))?;
|
||||||
@@ -59,14 +67,16 @@ async fn run_commands(
|
|||||||
|
|
||||||
let report_handle = start_report(logs_client, trace_filename, &workload.target).await?;
|
let report_handle = start_report(logs_client, trace_filename, &workload.target).await?;
|
||||||
|
|
||||||
for batch in workload
|
run_commands(
|
||||||
.commands
|
meili_client,
|
||||||
.as_slice()
|
&workload.commands,
|
||||||
.split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
|
0,
|
||||||
{
|
&assets,
|
||||||
super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
|
asset_folder,
|
||||||
.await?;
|
&mut HashMap::new(),
|
||||||
}
|
false,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
let processor =
|
let processor =
|
||||||
stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
|
stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
|
||||||
@@ -81,14 +91,14 @@ pub async fn execute(
|
|||||||
assets_client: &Client,
|
assets_client: &Client,
|
||||||
dashboard_client: &DashboardClient,
|
dashboard_client: &DashboardClient,
|
||||||
logs_client: &Client,
|
logs_client: &Client,
|
||||||
meili_client: &Client,
|
meili_client: &Arc<Client>,
|
||||||
invocation_uuid: Uuid,
|
invocation_uuid: Uuid,
|
||||||
master_key: Option<&str>,
|
master_key: Option<&str>,
|
||||||
workload: Workload,
|
workload: BenchWorkload,
|
||||||
args: &BenchDeriveArgs,
|
args: &BenchArgs,
|
||||||
binary_path: Option<&Path>,
|
binary_path: Option<&Path>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
|
assets::fetch_assets(assets_client, &workload.assets, &args.common.asset_folder).await?;
|
||||||
|
|
||||||
let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;
|
let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;
|
||||||
|
|
||||||
@@ -129,38 +139,33 @@ pub async fn execute(
|
|||||||
async fn execute_run(
|
async fn execute_run(
|
||||||
dashboard_client: &DashboardClient,
|
dashboard_client: &DashboardClient,
|
||||||
logs_client: &Client,
|
logs_client: &Client,
|
||||||
meili_client: &Client,
|
meili_client: &Arc<Client>,
|
||||||
workload_uuid: Uuid,
|
workload_uuid: Uuid,
|
||||||
master_key: Option<&str>,
|
master_key: Option<&str>,
|
||||||
workload: &Workload,
|
workload: &BenchWorkload,
|
||||||
args: &BenchDeriveArgs,
|
args: &BenchArgs,
|
||||||
binary_path: Option<&Path>,
|
binary_path: Option<&Path>,
|
||||||
run_number: u16,
|
run_number: u16,
|
||||||
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
||||||
meili_process::delete_db();
|
delete_db().await;
|
||||||
|
|
||||||
let run_command = match binary_path {
|
let binary = match binary_path {
|
||||||
Some(binary_path) => tokio::process::Command::new(binary_path),
|
Some(binary_path) => Binary {
|
||||||
None => {
|
source: crate::common::instance::BinarySource::Path(binary_path.to_owned()),
|
||||||
meili_process::build().await?;
|
extra_cli_args: workload.extra_cli_args.clone(),
|
||||||
let mut command = tokio::process::Command::new("cargo");
|
},
|
||||||
command
|
None => Binary {
|
||||||
.arg("run")
|
source: crate::common::instance::BinarySource::Build {
|
||||||
.arg("--release")
|
edition: crate::common::instance::Edition::Community,
|
||||||
.arg("-p")
|
},
|
||||||
.arg("meilisearch")
|
extra_cli_args: workload.extra_cli_args.clone(),
|
||||||
.arg("--bin")
|
},
|
||||||
.arg("meilisearch")
|
|
||||||
.arg("--");
|
|
||||||
command
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let meilisearch =
|
let meilisearch =
|
||||||
meili_process::start(meili_client, master_key, workload, &args.asset_folder, run_command)
|
start_meili(meili_client, master_key, &binary, &args.common.asset_folder).await?;
|
||||||
.await?;
|
|
||||||
|
|
||||||
let processor = run_commands(
|
let processor = run_workload_commands(
|
||||||
dashboard_client,
|
dashboard_client,
|
||||||
logs_client,
|
logs_client,
|
||||||
meili_client,
|
meili_client,
|
||||||
@@ -171,7 +176,7 @@ async fn execute_run(
|
|||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
meili_process::kill(meilisearch).await;
|
process::kill_meili(meilisearch).await;
|
||||||
|
|
||||||
tracing::info!(run_number, "Successful run");
|
tracing::info!(run_number, "Successful run");
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user