Compare commits

...

149 Commits

Author SHA1 Message Date
Kerollmops
e80a866b38 Implement the intersection_with_serialized method 2025-12-15 09:24:38 +01:00
Kerollmops
175d3dec43 Remove unused wrapper methods 2025-12-14 13:35:39 +01:00
Kerollmops
9a01a718ff Fix some error handling 2025-12-13 12:03:50 +01:00
Kerollmops
b8a3decbc5 Implement merge_into 2025-12-13 12:03:21 +01:00
Kerollmops
3d8c701f42 Fix a logic bug when serializing delta-encoded bitmaps 2025-12-13 12:01:30 +01:00
Kerollmops
1c9b714a00 Use the DeCboRoaringBitmapCodec everywhere 2025-12-12 14:37:24 +01:00
Kerollmops
8f30397c16 WIP hide the CboRoaringBitmapCodec to replace it more conveniently 2025-12-12 11:56:29 +01:00
Kerollmops
46a0899979 Expose a CLI parameter to enable or disable delta-encoding of bitmaps 2025-12-12 11:56:28 +01:00
Kerollmops
7728d4bcfa Use unsafe blocks to set env variables 2025-12-12 11:56:28 +01:00
Kerollmops
c805094243 Support conditionally serializing with new format 2025-12-12 11:56:28 +01:00
Kerollmops
dd78d65236 Simplify the computation of the raw u32s bytes size 2025-12-12 11:56:28 +01:00
Clément Renault
fb1a24cef9 WIP missing DeCboRoaringBitmapCodec::bytes_encode implem 2025-12-12 11:56:28 +01:00
Kerollmops
2cdebc0d4b Remove the unwraps, asserts and return actual io errors 2025-12-12 11:56:28 +01:00
Kerollmops
40c90147ad Move to a two bytes header 2025-12-12 11:56:28 +01:00
Kerollmops
0cac701520 Introduce a first working version of the DeBitmapCodec 2025-12-12 11:56:28 +01:00
Clément Renault
26e368b116 Merge pull request #6041 from meilisearch/fix-workflow-injection
Remove risk of command injection
2025-12-09 17:04:58 +00:00
curquiza
ba95ac0915 Remove risk of command injection 2025-12-09 17:06:41 +01:00
Clément Renault
75fcbfc2fe Merge pull request #6039 from meilisearch/bump-rust-to-1-19-1
Move to Rust v1.91.1
2025-12-09 13:55:08 +00:00
Kerollmops
8c19b6d55e Make the new Clippy happy 2025-12-09 14:33:04 +01:00
Kerollmops
08d0f05ece Remove a warning 2025-12-09 13:58:37 +01:00
Kerollmops
4762e9afa0 Move to Rust v1.91.1 2025-12-09 13:52:46 +01:00
Clément Renault
12fcab91c5 Merge pull request #6037 from meilisearch/fix-intel-mac
Fix macos-amd64 compilation
2025-12-08 13:21:51 +00:00
Louis Dureuil
792a72a23f Add missing cfg 2025-12-08 13:22:01 +01:00
Louis Dureuil
2dd7f29edf Merge pull request #6034 from meilisearch/update-version-v1.29.0
Update version for the next release (v1.29.0) in Cargo.toml
2025-12-08 08:01:41 +00:00
dureuill
ff680d29a8 Update version for the next release (v1.29.0) in Cargo.toml 2025-12-04 16:24:56 +00:00
Clément Renault
00420dfca0 Merge pull request #6018 from qdequele/add-support-xlmrobertamodels
Add support for XLM Roberta models
2025-12-04 15:46:53 +00:00
Quentin de Quelen
a3a86ac629 chore: cargo fmt 2025-12-04 16:27:19 +01:00
Quentin de Quelen
f6210b8e5e add tests for the support of the models XLMRoberta 2025-12-04 16:27:19 +01:00
Quentin de Quelen
fe46af7ded add support of models XLMRoberta 2025-12-04 16:27:19 +01:00
Clément Renault
57b94b411f Merge pull request #6030 from meilisearch/require-git
Require git
2025-12-04 14:29:33 +00:00
Clément Renault
a7b6f65851 Merge pull request #6022 from meilisearch/xtask-generate-proto-name
Introduce xtask sub-command to generate prototypes
2025-12-04 13:53:20 +00:00
Louis Dureuil
1ec6646d8c Merge pull request #6029 from meilisearch/dumpless-upgrade-migrations
Switch to migration-oriented dumpless upgrade
2025-12-04 13:35:26 +00:00
Kerollmops
2dccacf273 Hide git fetch output 2025-12-04 14:35:03 +01:00
Kerollmops
ce0f04e9ee Improve the prototype guide 2025-12-04 14:35:03 +01:00
Kerollmops
9ba5c6d371 Update the prototype format 2025-12-04 14:35:03 +01:00
Kerollmops
56673fee56 Introduce the first working version of the tool 2025-12-04 14:35:03 +01:00
Clément Renault
b30bcbb931 Merge pull request #6032 from meilisearch/bump-hannoy
Bump hannoy to v0.1.0-nested-rtxns
2025-12-04 13:30:43 +00:00
Kerollmops
5fbe4436c8 Bump hannoy to v0.1.0-nested-rtxns 2025-12-04 14:06:45 +01:00
Louis Dureuil
8fa253c293 fmt 2025-12-04 13:55:28 +01:00
Louis Dureuil
4833da9edb Chore: remove some duplicated lambdas to ease compile time 2025-12-04 13:55:28 +01:00
Louis Dureuil
c0e31a4f01 Switch to migration-oriented dumpless upgrade 2025-12-04 13:55:28 +01:00
Louis Dureuil
c06ffb31d1 Update snapshots 2025-12-04 13:55:28 +01:00
Louis Dureuil
3097314b9d Make snapshots independent on the version 2025-12-04 13:55:27 +01:00
Louis Dureuil
786a978237 fmt 2025-12-04 13:52:57 +01:00
Louis Dureuil
03e53aaf6d Add binary to display build-info 2025-12-04 13:52:57 +01:00
Louis Dureuil
2206f045a4 replace git2 by the git command line in build-info 2025-12-04 13:52:56 +01:00
Louis Dureuil
246cf8b2d1 Mimic what is done for publish asset in the CI, for faster build 2025-12-04 13:52:56 +01:00
Louis Dureuil
82adabc5a0 Merge pull request #5861 from meilisearch/upgrade-tests
Declarative tests
2025-12-04 11:00:53 +00:00
Louis Dureuil
c9a22247d2 add hannoy test 2025-12-04 11:41:41 +01:00
Louis Dureuil
c535b8ddef Use variables to account for changes between local and CI 2025-12-04 09:47:37 +01:00
Louis Dureuil
8e89619aed Also evaluate variables in expected responses 2025-12-04 09:47:21 +01:00
Clément Renault
f617ca8e38 Merge pull request #6023 from meilisearch/curquiza-patch-1
Send notifications for Kubernetes integration when releasing
2025-12-04 07:00:50 +00:00
Louis Dureuil
959175ad2a switch to gh runner 2025-12-03 22:59:57 +01:00
Louis Dureuil
341ffbf5ef Modify bot message on db-change labeled PRs 2025-12-03 21:25:41 +01:00
Louis Dureuil
542f3073f4 Appease codeql 2025-12-03 21:25:41 +01:00
Louis Dureuil
0f134b079f hf-embed workload: add ranking scores 2025-12-03 21:25:41 +01:00
Louis Dureuil
9e7ae47355 Add missing sha 2025-12-03 21:25:41 +01:00
Louis Dureuil
1edf07df29 Add tests to CI 2025-12-03 21:25:40 +01:00
Louis Dureuil
88aa3cddde Support local builds of enterprise binaries 2025-12-03 21:25:40 +01:00
Louis Dureuil
e6846cb55a Rename and move the test instructions 2025-12-03 21:25:40 +01:00
Louis Dureuil
29b715e2f9 Update workloads 2025-12-03 21:25:40 +01:00
Louis Dureuil
f28dc5bd2b cleaning 2025-12-03 21:25:40 +01:00
Louis Dureuil
56d0b8ea54 Some cleaning 2025-12-03 21:25:40 +01:00
Louis Dureuil
514edb1b79 Add workloads 2025-12-03 21:25:40 +01:00
Louis Dureuil
cfb609d41d clippy 2025-12-03 21:25:40 +01:00
Louis Dureuil
11cb062067 fmt 2025-12-03 21:25:40 +01:00
Louis Dureuil
2ca4926ac5 Support editions, move to common 2025-12-03 21:25:40 +01:00
Louis Dureuil
834bd9b879 Fix uninitialization issue on unsupported platforms 2025-12-03 21:25:39 +01:00
Louis Dureuil
cac7e00983 Remove chrono 2025-12-03 21:25:39 +01:00
Mubelotix
e9300bac64 Add documentation 2025-12-03 21:25:39 +01:00
Mubelotix
b0da7864a4 Api key tests 2025-12-03 21:25:39 +01:00
Mubelotix
2b9d379feb Add variable registration mechanism 2025-12-03 21:25:39 +01:00
Mubelotix
8d585a04d4 Update movies workload 2025-12-03 21:25:39 +01:00
Mubelotix
0095a72fba Test for upgrade 2025-12-03 21:25:39 +01:00
Mubelotix
651339648c Fix processing time ms 2025-12-03 21:25:39 +01:00
Mubelotix
a489f4c172 Update issue template 2025-12-03 21:25:39 +01:00
Mubelotix
3b875ea00e Update movies 2025-12-03 21:25:39 +01:00
Mubelotix
9d269c499c Fix line feed at the end of files 2025-12-03 21:25:39 +01:00
Mubelotix
da35ae0a6e Update emojis 2025-12-03 21:25:38 +01:00
Mubelotix
61945b235d Add redaction system 2025-12-03 21:25:38 +01:00
Mubelotix
e936ac172d Fix compilation 2025-12-03 21:25:38 +01:00
Mubelotix
162a84cdbf Improve error detection 2025-12-03 21:25:38 +01:00
Mubelotix
92c63cf351 Improve diffing 2025-12-03 21:25:38 +01:00
Mubelotix
fca35b7476 Add upgrade system 2025-12-03 21:25:38 +01:00
Mubelotix
4056657a55 Refactor around meili_path 2025-12-03 21:25:38 +01:00
Mubelotix
685d227597 Move file to common 2025-12-03 21:25:38 +01:00
Mubelotix
49b9f6ff38 Remove useless data 2025-12-03 21:25:38 +01:00
Mubelotix
79d0a3fb97 Remove useless parameter 2025-12-03 21:25:38 +01:00
Mubelotix
313ef7e79b Add response updating logic 2025-12-03 21:25:37 +01:00
Mubelotix
256407be61 Fix asset version issues 2025-12-03 21:25:37 +01:00
Mubelotix
8b3943bd32 Do so that meilisearch versions get downloaded 2025-12-03 21:25:37 +01:00
Mubelotix
87b972d29a Implement test workload running logic 2025-12-03 21:25:37 +01:00
Mubelotix
09ab61b360 Continue integrating commands to tests 2025-12-03 21:25:37 +01:00
Mubelotix
2459f381b4 Remove dead code 2025-12-03 21:25:37 +01:00
Mubelotix
6442f02de4 Make commands common 2025-12-03 21:25:37 +01:00
Mubelotix
91c4d9ea79 Tag workloads 2025-12-03 21:25:37 +01:00
Mubelotix
92a4091da3 Create test workload 2025-12-03 21:25:37 +01:00
Mubelotix
29a337f0f9 Create the test function 2025-12-03 21:25:36 +01:00
Mubelotix
8c3cebadaa Create the test xtask command and args 2025-12-03 21:25:36 +01:00
Clément Renault
b566458aa2 Merge pull request #6027 from meilisearch/release-v1.28.2
Bring back changes from v1.28.2
2025-12-03 17:46:44 +00:00
Clément Renault
ae4344e359 Merge pull request #6004 from meilisearch/default-experimental-vector-store
Make Hannoy the default vector store
2025-12-03 17:16:46 +00:00
Kerollmops
b6cb384650 Fix settings tests 2025-12-03 17:52:52 +01:00
Clément Renault
2c3e3d856c Make hannoy the default vector store when creating an index 2025-12-03 17:52:52 +01:00
Clémentine
93e97f814c Add notifications for Kubernetes integration
Updated comments and conditions for notifying integration teams.
2025-12-03 17:49:46 +01:00
Kerollmops
e9350f033d Limit the number of retrieved task to one 2025-12-03 17:43:48 +01:00
Kerollmops
54c92fd6c0 Update the snapshots 2025-12-03 17:43:48 +01:00
Kerollmops
4f4df83a51 Bump the version to v1.28.2 2025-12-03 17:43:48 +01:00
Clément Renault
a51021cab7 Merge pull request #6026 from meilisearch/free-space
Fix the CI issues
2025-12-03 16:18:41 +00:00
Louis Dureuil
e33f4fdeae Attempt to eschew containers for ubuntu 2025-12-03 16:28:19 +01:00
Louis Dureuil
e407bca196 use feature as cache key 2025-12-03 16:24:48 +01:00
Louis Dureuil
cd24ea11b4 correctly clean space + remove test in debug 2025-12-03 16:12:08 +01:00
Louis Dureuil
ba578e7ab5 Fix ollama test following update on their side 2025-12-03 15:48:30 +01:00
Louis Dureuil
05a74d1e68 remove non-existing rust-toolchain action arguments 2025-12-03 15:37:51 +01:00
Louis Dureuil
41d61deb97 Make runners/containers more uniform 2025-12-03 15:34:57 +01:00
Louis Dureuil
bba292b01a Run ollama test on 22.04 2025-12-03 15:21:02 +01:00
Louis Dureuil
96923dff33 adjust test suite 2025-12-03 15:01:58 +01:00
Louis Dureuil
8f9c9305da set back the cache 2025-12-03 14:10:18 +01:00
Louis Dureuil
a9f309e1d1 Remove macos and windows from PRs 2025-12-03 13:54:02 +01:00
Louis Dureuil
e456a9acd8 Add the disk freeing to all ubuntu-22.04 jobs 2025-12-03 11:51:42 +01:00
Louis Dureuil
9b7d29466c Attempt to earn some free space... 2025-12-03 11:41:00 +01:00
Clément Renault
b0ef14b6f0 Merge pull request #5983 from meilisearch/new-searchable-settings-indexer
Support the searchable and exact attributes in the new Settings Indexer
2025-12-02 11:03:36 +00:00
Clément Renault
36febe2068 Merge pull request #6021 from meilisearch/skip-macos-windows-in-merge-queue
Skip the macOS and Windows CI in the merge queue
2025-12-02 08:29:06 +00:00
Kerollmops
6f14a6ec18 Skip the macOS and Windows CI in the merge queue 2025-12-01 16:59:55 +01:00
Kerollmops
fce046d84d Fix non-detected searchable attribute 2025-11-28 11:29:31 +01:00
Kerollmops
3fc507bb44 Introduce a test for when a new nested field becomes searchable 2025-11-28 11:29:31 +01:00
Kerollmops
fdbcd033fb Clean up the CI 2025-11-28 11:29:31 +01:00
Clément Renault
aaab49baca Fix a bug and improve code quality
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Kerollmops
0d0d6e8099 Update the proximity precision for the settings delta 2025-11-28 11:29:31 +01:00
Clément Renault
c1e351c92b Show available space 2025-11-28 11:29:31 +01:00
Clément Renault
67cab4cc9d Trigger the new settings indexer when changing the proximity precision 2025-11-28 11:29:31 +01:00
Clément Renault
f30a37b0fe Clear old word prefix fid docids entries when removing searchable fields 2025-11-28 11:29:31 +01:00
Clément Renault
a78a9f80dd Introduce the word pair proximity extractor 2025-11-28 11:29:31 +01:00
Clément Renault
439fee5434 Move the has_searchable_children function to the appropriate module 2025-11-28 11:29:31 +01:00
Clément Renault
9e858590e0 Rename the function to extract document words when a setting changes
Co-authored-By: Maxime Legendre <maxime@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
29eebd5f93 Merge the logic of the function detecting searchable children fields 2025-11-28 11:29:31 +01:00
Clément Renault
07da6edbdf Fix a bug when nested fields appear
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
22b83042e6 Add some comments
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
52ab13906a Fix a test trying to change settings with a wtxn 2025-11-28 11:29:31 +01:00
Clément Renault
29bec8efd4 Make sure the embedders supports changing searchables 2025-11-28 11:29:31 +01:00
Clément Renault
6947a8990b Make sure we don't crash on unreferenced fields 2025-11-28 11:29:31 +01:00
Clément Renault
fbb2bb0c73 Make clippy happy 2025-11-28 11:29:31 +01:00
Clément Renault
15918f53a9 Introduce new progress steps when deleting fid-based entries 2025-11-28 11:29:30 +01:00
Clément Renault
d7f5f3a0a3 Delete entries from fid-based databases when searchables are deleted 2025-11-28 11:29:30 +01:00
Clément Renault
1afbf35f27 Support exact attributes in the settings delta 2025-11-28 11:29:30 +01:00
Clément Renault
d7675233d5 Call the post processing in the new settings indexer 2025-11-28 11:29:30 +01:00
Clément Renault
c63c1ac32b Support exact attributes in the field metadata 2025-11-28 11:29:30 +01:00
Clément Renault
6171dcde0d Call the new searchable extractor 2025-11-28 11:29:30 +01:00
Clément Renault
04bc134324 Introduce the new searchable extractor 2025-11-28 11:29:30 +01:00
Clément Renault
8ff39d927d Enable the new settings indexer when the searchable or exact are updates 2025-11-28 11:29:30 +01:00
167 changed files with 6432 additions and 1785 deletions

View File

@@ -24,6 +24,11 @@ TBD
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
### Reminders when adding features
- [ ] Write unit tests using insta
- [ ] Write declarative integration tests in [workloads/tests](https://github.com/meilisearch/meilisearch/tree/main/workloads/test). Specify the routes to call and then call `cargo xtask test workloads/tests/YOUR_TEST.json --update-responses` so that responses are automatically filled.
### Reminders when modifying the API
- [ ] Update the openAPI file with utoipa:

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -66,9 +66,7 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |

View File

@@ -12,9 +12,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- uses: dtolnay/rust-toolchain@1.91.1
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -44,7 +44,7 @@ jobs:
exit 1
fi
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal

View File

@@ -6,7 +6,7 @@ on:
env:
MESSAGE: |
### Hello, I'm a bot 🤖
### Hello, I'm a bot 🤖
You are receiving this message because you declared that this PR make changes to the Meilisearch database.
Depending on the nature of the change, additional actions might be required on your part. The following sections detail the additional actions depending on the nature of the change, please copy the relevant section in the description of your PR, and make sure to perform the required actions.
@@ -19,6 +19,7 @@ env:
- [ ] Detail the change to the DB format and why they are forward compatible
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
## This PR makes breaking changes
@@ -35,8 +36,7 @@ env:
- [ ] Write the code to go from the old database to the new one
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
- [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
- [ ] Declarative test: add a [declarative test containing a dumpless upgrade](https://github.com/meilisearch/meilisearch/blob/main/TESTING.md#typical-usage)
jobs:
add-comment:

View File

@@ -3,7 +3,7 @@ name: Look for flaky tests
on:
workflow_dispatch:
schedule:
- cron: '0 4 * * *' # Every day at 4:00AM
- cron: "0 4 * * *" # Every day at 4:00AM
jobs:
flaky:
@@ -13,11 +13,17 @@ jobs:
image: ubuntu:22.04
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps

View File

@@ -12,9 +12,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- uses: dtolnay/rust-toolchain@1.91.1
# Run benchmarks
- name: Run the fuzzer

View File

@@ -25,7 +25,13 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.89
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v5

View File

@@ -208,8 +208,8 @@ jobs:
done
cosign sign --yes ${images}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# /!\ Don't touch this without checking with engineers working on the Cloud code base on #discussion-engineering Slack channel
- name: Notify meilisearch-cloud
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: ${{ (github.event_name == 'push') && (matrix.edition == 'enterprise') }}
uses: peter-evans/repository-dispatch@v3
@@ -218,3 +218,14 @@ jobs:
repository: meilisearch/meilisearch-cloud
event-type: cloud-docker-build
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
# /!\ Don't touch this without checking with integration team members on #discussion-integrations Slack channel
- name: Notify meilisearch-kubernetes
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event), or if not stable
if: ${{ github.event_name == 'push' && matrix.edition == 'community' && steps.check-tag-format.outputs.stable == 'true' }}
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-kubernetes
event-type: meilisearch-release
client-payload: '{ "version": "${{ github.ref_name }}" }'

View File

@@ -76,7 +76,7 @@ jobs:
needs: check-version
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- uses: dtolnay/rust-toolchain@1.91.1
- name: Build
run: cargo build --release --locked ${{ matrix.feature-flag }} ${{ matrix.extra-args }}
# No need to upload binaries for dry run (cron or workflow_dispatch)

View File

@@ -25,14 +25,18 @@ jobs:
- uses: actions/checkout@v5
- name: Define the Docker image we need to use
id: define-image
env:
EVENT_NAME: ${{ github.event_name }}
DOCKER_IMAGE_INPUT: ${{ github.event.inputs.docker_image }}
run: |
event=${{ github.event_name }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
if [[ "$EVENT_NAME" == 'workflow_dispatch' ]]; then
echo "docker-image=$DOCKER_IMAGE_INPUT" >> $GITHUB_OUTPUT
fi
- name: Docker image is ${{ steps.define-image.outputs.docker-image }}
run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"
env:
DOCKER_IMAGE: ${{ steps.define-image.outputs.docker-image }}
run: echo "Docker image is $DOCKER_IMAGE"
##########
## SDKs ##

View File

@@ -19,31 +19,36 @@ jobs:
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [ubuntu-24.04, ubuntu-24.04-arm]
runner: [ubuntu-22.04, ubuntu-22.04-arm]
features: ["", "--features enterprise"]
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: check free space before
run: df -h
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: check free space after
run: df -h
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.89
uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo check without any default features
with:
key: ${{ matrix.features }}
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
args: --locked --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all ${{ matrix.features }}
args: --locked --all ${{ matrix.features }}
test-others:
name: Tests on ${{ matrix.os }}
@@ -53,53 +58,56 @@ jobs:
matrix:
os: [macos-14, windows-2022]
features: ["", "--features enterprise"]
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo check without any default features
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
args: --locked --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all ${{ matrix.features }}
args: --locked --all ${{ matrix.features }}
test-all-features:
name: Tests almost all features
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
runs-on: ubuntu-22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
cargo build --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
cargo test --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
ollama-ubuntu:
name: Test with Ollama
runs-on: ubuntu-latest
strategy:
matrix:
features: ["", "--features enterprise"]
runs-on: ubuntu-22.04
env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
@@ -123,21 +131,21 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all --features test-ollama ollama ${{ matrix.features }}
args: --locked -p meilisearch --features test-ollama ollama
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:22.04
runs-on: ubuntu-22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.89
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -148,35 +156,39 @@ jobs:
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
build:
name: Build in release
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Build
run: cargo build --release --locked --target x86_64-unknown-linux-gnu
clippy:
name: Run Clippy
runs-on: ubuntu-22.04
strategy:
matrix:
features: ["", "--features enterprise"]
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
with:
command: test
args: --locked --all ${{ matrix.features }}
clippy:
name: Run Clippy
runs-on: ubuntu-latest
strategy:
matrix:
features: ["", "--features enterprise"]
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
@@ -188,14 +200,17 @@ jobs:
fmt:
name: Run Rustfmt
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
with:
profile: minimal
toolchain: nightly-2024-07-09
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
@@ -206,3 +221,23 @@ jobs:
run: |
echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs
cargo fmt --all -- --check
declarative-tests:
name: Run declarative tests
runs-on: ubuntu-22.04-arm
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run declarative tests
run: |
cargo xtask test workloads/tests/*.json

View File

@@ -18,9 +18,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.91.1
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml file

View File

@@ -124,6 +124,7 @@ They are JSON files with the following structure (comments are not actually supp
{
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
"name": "hackernews.ndjson_1M,no-threads",
"type": "bench",
// Number of consecutive runs of the commands that should be performed.
// Each run uses a fresh instance of Meilisearch and a fresh database.
// Each run produces its own report file.

673
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.28.1"
version = "1.29.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

326
TESTING.md Normal file
View File

@@ -0,0 +1,326 @@
# Declarative tests
Declarative tests ensure that Meilisearch features remain stable across versions.
While we already have unit tests, those are run against **temporary databases** that are created fresh each time and therefore never risk corruption.
Declarative tests instead **simulate the lifetime of a database**: they chain together commands and requests to change the binary, verifying that database state and API responses remain consistent.
## Basic example
```jsonc
{
"type": "test",
"name": "api-keys",
"binary": { // the first command will run on the binary following this specification.
"source": "release", // get the binary as a release from GitHub
"version": "1.19.0", // version to fetch
"edition": "community" // edition to fetch
},
"commands": []
}
```
This example defines a no-op test (it does nothing).
If the file is saved at `workloads/tests/example.json`, you can run it with:
```bash
cargo xtask test workloads/tests/example.json
```
## Commands
Commands represent API requests sent to Meilisearch endpoints during a test.
They are executed sequentially, and their responses can be validated to ensure consistent behavior across upgrades.
```jsonc
{
"route": "keys",
"method": "POST",
"body": {
"inline": {
"actions": [
"search",
"documents.add"
],
"description": "Test API Key",
"expiresAt": null,
"indexes": [ "movies" ]
}
}
}
```
This command issues a `POST /keys` request, creating an API key with permissions to search and add documents in the `movies` index.
### Using assets in commands
To keep tests concise and reusable, you can define **assets** at the root of the workload file.
Assets are external data sources (such as datasets) that are cached between runs, making tests faster and easier to read.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.19.0",
"edition": "community"
},
"assets": {
"movies.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
}
},
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
}
}
]
}
```
In this example:
- The `movies.json` dataset is defined as an asset, pointing to a remote URL.
- The SHA-256 checksum ensures integrity.
- The `POST /indexes/movies/documents` command uses this asset as the request body.
This makes the test much cleaner than inlining a large dataset directly into the command.
For asset handling, please refer to the [declarative benchmarks documentation](/BENCHMARKS.md#adding-new-assets).
### Asserting responses
Commands can specify both the **expected status code** and the **expected response body**.
```jsonc
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
},
"expectedStatus": 202,
"expectedResponse": {
"enqueuedAt": "[timestamp]", // Set to a bracketed string to ignore the value
"indexUid": "movies",
"status": "enqueued",
"taskUid": 1,
"type": "documentAdditionOrUpdate"
},
"synchronous": "WaitForTask"
}
```
Manually writing `expectedResponse` fields can be tedious.
Instead, you can let the test runner populate them automatically:
```bash
# Run the workload to populate expected fields. Only adds the missing ones, doesn't change existing data
cargo xtask test workloads/tests/example.json --add-missing-responses
# OR
# Run the workload to populate expected fields. Updates all fields including existing ones
cargo xtask test workloads/tests/example.json --update-responses
```
This workflow is recommended:
1. Write the test without expected fields.
2. Run it with `--add-missing-responses` to capture the actual responses.
3. Review and commit the generated expectations.
## Changing binary
It is possible to insert an instruction to change the current Meilisearch instance from one binary specification to another during a test.
When executed, such an instruction will:
1. Stop the current Meilisearch instance.
2. Fetch the binary specified by the instruction.
3. Restart the server with the specified binary on the same database.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.19.0", // start with version v1.19.0
"edition": "community"
},
"assets": {
"movies.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
}
},
"commands": [
// setup some data
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "movies.json"
}
},
// switch binary to v1.24.0
{
"binary": {
"source": "release",
"version": "1.24.0",
"edition": "community"
}
}
]
}
```
### Typical Usage
In most cases, the change binary instruction will be used to update a database.
- **Set up** some data using commands on an older version.
- **Upgrade** to the latest version.
- **Assert** that the data and API behavior remain correct after the upgrade.
To properly test the dumpless upgrade, one should typically:
1. Open the database without processing the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` and `--experimental-max-number-of-batched-tasks=0` as extra CLI arguments
2. Check that the search, stats and task queue still work.
3. Open the database and process the update task: Use a `binary` instruction to switch to the desired version, passing `--experimental-dumpless-upgrade` as the extra CLI argument. Use a `health` command to wait for the upgrade task to finish.
4. Check that the indexing, search, stats, and task queue still work.
```jsonc
{
"type": "test",
"name": "movies",
"binary": {
"source": "release",
"version": "1.12.0",
"edition": "community"
},
"commands": [
// 0. Run commands to populate the database
{
// ..
},
// 1. Open the database with new MS without processing the update task
{
"binary": {
"source": "build", // build the binary from the sources in the current git repository
"edition": "community",
"extraCliArgs": [
"--experimental-dumpless-upgrade", // allows to open with a newer MS
"--experimental-max-number-of-batched-tasks=0" // prevent processing of the update task
]
}
},
// 2. Check the search etc.
{
// ..
},
// 3. Open the database with new MS and processing the update task
{
"binary": {
"source": "build", // build the binary from the sources in the current git repository
"edition": "community",
"extraCliArgs": [
"--experimental-dumpless-upgrade" // allows to open with a newer MS
// no `--experimental-max-number-of-batched-tasks=0`
]
}
},
// 4. Check the indexing, search, etc.
{
// ..
}
]
}
```
This ensures backward compatibility: databases created with older Meilisearch versions should remain functional and consistent after an upgrade.
## Variables
Sometimes a command needs to use a value returned by a **previous response**.
These values can be captured and reused using the register field.
```jsonc
{
"route": "keys",
"method": "POST",
"body": {
"inline": {
"actions": [
"search",
"documents.add"
],
"description": "Test API Key",
"expiresAt": null,
"indexes": [ "movies" ]
}
},
"expectedResponse": {
"key": "c6f64630bad2996b1f675007c8800168e14adf5d6a7bb1a400a6d2b158050eaf",
// ...
},
"register": {
"key": "/key"
},
"synchronous": "WaitForResponse"
}
```
The `register` field captures the value at the JSON path `/key` from the response.
Paths follow the **JavaScript Object Notation Pointer (RFC 6901)** format.
Registered variables are available for all subsequent commands.
Registered variables can be referenced by wrapping their name in double curly braces:
In the route/path:
```jsonc
{
"route": "tasks/{{ task_id }}",
"method": "GET"
}
```
In the request body:
```jsonc
{
"route": "indexes/movies/documents",
"method": "PATCH",
"body": {
"inline": {
"id": "{{ document_id }}",
"overview": "Shazam turns evil and the world is in danger.",
}
}
}
```
Or they can be referenced by their name (**without curly braces**) as an API key:
```jsonc
{
"route": "indexes/movies/documents",
"method": "POST",
"body": { /* ... */ },
"apiKeyVariable": "key" // The **content** of the key variable will be used as an API key
}
```

View File

@@ -21,6 +21,10 @@ use roaring::RoaringBitmap;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn no_cancel() -> bool {
false
}
const BENCHMARK_ITERATION: usize = 10;
fn setup_dir(path: impl AsRef<Path>) {
@@ -65,7 +69,7 @@ fn setup_settings<'t>(
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
builder.execute(&no_cancel, &Progress::default(), Default::default()).unwrap();
}
fn setup_index_with_settings(
@@ -152,7 +156,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -168,7 +172,7 @@ fn indexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -220,7 +224,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -236,7 +240,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -266,7 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -282,7 +286,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -336,7 +340,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -352,7 +356,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -414,7 +418,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -430,7 +434,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -460,7 +464,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -476,7 +480,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -502,7 +506,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -518,7 +522,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -571,7 +575,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -587,7 +591,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -639,7 +643,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -655,7 +659,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -707,7 +711,7 @@ fn indexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -723,7 +727,7 @@ fn indexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -774,7 +778,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -790,7 +794,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -820,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -836,7 +840,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -889,7 +893,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -905,7 +909,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -967,7 +971,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -983,7 +987,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1014,7 +1018,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1030,7 +1034,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1057,7 +1061,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1073,7 +1077,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1125,7 +1129,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1141,7 +1145,7 @@ fn indexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1192,7 +1196,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1208,7 +1212,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1238,7 +1242,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1254,7 +1258,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1307,7 +1311,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1323,7 +1327,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1372,7 +1376,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
Some(primary_key),
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1422,7 +1426,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1438,7 +1442,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1468,7 +1472,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1484,7 +1488,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1510,7 +1514,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1526,7 +1530,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1601,7 +1605,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1617,7 +1621,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1693,7 +1697,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1709,7 +1713,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1777,7 +1781,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1793,7 +1797,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1845,7 +1849,7 @@ fn indexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1861,7 +1865,7 @@ fn indexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1912,7 +1916,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1928,7 +1932,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1958,7 +1962,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1974,7 +1978,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2027,7 +2031,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2043,7 +2047,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)

View File

@@ -15,4 +15,4 @@ time = { version = "0.3.44", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.100"
vergen-git2 = "1.0.7"
vergen-gitcl = "1.0.8"

View File

@@ -15,7 +15,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory.
let mut builder = vergen_git2::Git2Builder::default();
let mut builder = vergen_gitcl::GitclBuilder::default();
builder.branch(true);
builder.commit_timestamp(true);
@@ -25,5 +25,5 @@ fn emit_git_variables() -> anyhow::Result<()> {
let git2 = builder.build()?;
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
vergen_gitcl::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
}

View File

@@ -0,0 +1,6 @@
use build_info::BuildInfo;
fn main() {
let info = BuildInfo::from_build();
dbg!(info);
}

View File

@@ -107,19 +107,14 @@ impl Settings<Unchecked> {
}
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Default, Debug, Clone, PartialEq)]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet)

View File

@@ -161,19 +161,14 @@ pub struct Facets {
pub min_level_size: Option<NonZeroUsize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn map<U, F>(self, f: F) -> Setting<U>
where

View File

@@ -1,9 +1,7 @@
use std::fmt::{self, Display, Formatter};
use std::marker::PhantomData;
use std::str::FromStr;
use serde::de::Visitor;
use serde::{Deserialize, Deserializer};
use serde::Deserialize;
use uuid::Uuid;
use super::settings::{Settings, Unchecked};
@@ -82,59 +80,3 @@ impl Display for IndexUidFormatError {
}
impl std::error::Error for IndexUidFormatError {}
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum StarOr<T> {
Star,
Other(T),
}
impl<'de, T, E> Deserialize<'de> for StarOr<T>
where
T: FromStr<Err = E>,
E: Display,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
/// deserialize everything as a `StarOr::Other`, including "*".
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
/// not supported on untagged enums.
struct StarOrVisitor<T>(PhantomData<T>);
impl<T, FE> Visitor<'_> for StarOrVisitor<T>
where
T: FromStr<Err = FE>,
FE: Display,
{
type Value = StarOr<T>;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("a string")
}
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
where
SE: serde::de::Error,
{
match v {
"*" => Ok(StarOr::Star),
v => {
let other = FromStr::from_str(v).map_err(|e: T::Err| {
SE::custom(format!("Invalid `other` value: {}", e))
})?;
Ok(StarOr::Other(other))
}
}
}
}
deserializer.deserialize_str(StarOrVisitor(PhantomData))
}
}

View File

@@ -192,19 +192,14 @@ pub struct Facets {
pub min_level_size: Option<NonZeroUsize>,
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn set(self) -> Option<T> {
match self {

View File

@@ -47,20 +47,15 @@ pub struct Settings<T> {
pub _kind: PhantomData<T>,
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
#[derive(Default, Debug, Clone, PartialEq, Eq, Copy)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Setting<T> {
Set(T),
Reset,
#[default]
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn set(self) -> Option<T> {
match self {

View File

@@ -322,7 +322,7 @@ impl From<Task> for TaskView {
_ => None,
});
let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
let duration = finished_at.zip(started_at).map(|(tf, ts)| tf - ts);
Self {
uid: id,

View File

@@ -4,9 +4,9 @@ use std::fmt::Write;
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning;
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
@@ -187,7 +187,7 @@ pub fn snapshot_all_batches(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Batch>>)
pub fn snapshot_batches_to_tasks_mappings(
rtxn: &RoTxn,
db: Database<BEU32, CboRoaringBitmapCodec>,
db: Database<BEU32, DeCboRoaringBitmapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
@@ -198,7 +198,7 @@ pub fn snapshot_batches_to_tasks_mappings(
snap
}
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, DeCboRoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@@ -320,7 +320,11 @@ fn snapshot_details(d: &Details) -> String {
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
}
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
if to == &(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) {
format!("{{ from: {from:?}, to: [current version] }}")
} else {
format!("{{ from: {from:?}, to: {to:?} }}")
}
}
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
@@ -400,7 +404,21 @@ pub fn snapshot_batch(batch: &Batch) -> String {
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
let details = if let Some(upgrade_to) = &details.upgrade_to {
if upgrade_to.as_str()
== format!("v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}").as_str()
{
let mut details = details.clone();
details.upgrade_to = Some("[current version]".into());
serde_json::to_string(&details).unwrap()
} else {
serde_json::to_string(details).unwrap()
}
} else {
serde_json::to_string(details).unwrap()
};
snap.push_str(&format!("details: {details}, "));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
if !embedder_stats.skip_serializing() {
snap.push_str(&format!(

View File

@@ -4,7 +4,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
@@ -42,11 +42,11 @@ pub struct BatchQueue {
/// Store the batches associated to an index.
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the batches containing tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
/// Store the batches containing finished tasks started at a specific date
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
/// Store the batches containing tasks finished at a specific date
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
}
impl BatchQueue {

View File

@@ -14,7 +14,7 @@ use std::time::Duration;
use file_store::FileStore;
use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use time::format_description::well_known::Rfc3339;
@@ -130,7 +130,7 @@ pub struct Queue {
pub(crate) batches: batches::BatchQueue,
/// Matches a batch id with the associated task ids.
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
pub(crate) batch_to_tasks_mapping: Database<BEU32, DeCboRoaringBitmapCodec>,
/// The list of files referenced by the tasks.
pub(crate) file_store: FileStore,

View File

@@ -2,7 +2,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status, Task};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
@@ -43,11 +43,11 @@ pub struct TaskQueue {
/// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
}
impl TaskQueue {

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -1,7 +1,7 @@
use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use meilisearch_types::versioning;
use time::OffsetDateTime;
use tracing::info;
@@ -9,83 +9,82 @@ use crate::queue::TaskQueue;
use crate::versioning::Versioning;
trait UpgradeIndexScheduler {
fn upgrade(
&self,
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32);
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()>;
/// Whether the migration should be applied, depending on the initial version of the index scheduler before
/// any migration was applied
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
/// A progress-centric description of the migration
fn description(&self) -> &'static str;
}
/// Upgrade the index scheduler to the binary version.
///
/// # Warning
///
/// The current implementation uses a single wtxn to the index scheduler for the whole duration of the upgrade.
/// If migrations start taking take a long time, it might prevent tasks from being registered.
/// If this issue manifests, then it can be mitigated by adding a `fn target_version` to `UpgradeIndexScheduler`,
/// to be able to write intermediate versions and drop the wtxn between applying migrations.
pub fn upgrade_index_scheduler(
env: &Env<WithoutTls>,
versioning: &Versioning,
from: (u32, u32, u32),
to: (u32, u32, u32),
initial_version: (u32, u32, u32),
) -> anyhow::Result<()> {
let current_major = to.0;
let current_minor = to.1;
let current_patch = to.2;
let target_major: u32 = versioning::VERSION_MAJOR;
let target_minor: u32 = versioning::VERSION_MINOR;
let target_patch: u32 = versioning::VERSION_PATCH;
let target_version = (target_major, target_minor, target_patch);
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
];
let start = match from {
(1, 12, _) => 0,
(1, 13, _) => 0,
(1, 14, _) => 0,
(1, 15, _) => 0,
(1, 16, _) => 0,
(1, 17, _) => 0,
(1, 18, _) => 0,
(1, 19, _) => 0,
(1, 20, _) => 0,
(1, 21, _) => 0,
(1, 22, _) => 0,
(1, 23, _) => 0,
(1, 24, _) => 0,
(1, 25, _) => 0,
(1, 26, _) => 0,
(1, 27, _) => 0,
(1, 28, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)
|| (major == current_major && minor == current_minor && patch > current_patch)
{
bail!(
"Database version {major}.{minor}.{patch} is higher than the Meilisearch version {current_major}.{current_minor}.{current_patch}. Downgrade is not supported",
);
} else if major < 1 || (major == current_major && minor < 12) {
bail!(
"Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{current_major}.{current_minor}.{current_patch}",
);
} else {
bail!("Unknown database version: v{major}.{minor}.{patch}");
}
}
};
info!("Upgrading the task queue");
let mut local_from = from;
for upgrade in upgrade_functions[start..].iter() {
let target = upgrade.target_version();
info!(
"Upgrading from v{}.{}.{} to v{}.{}.{}",
local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
);
let mut wtxn = env.write_txn()?;
upgrade.upgrade(env, &mut wtxn, local_from)?;
versioning.set_version(&mut wtxn, target)?;
wtxn.commit()?;
local_from = target;
if initial_version == target_version {
return Ok(());
}
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
// List all upgrade functions to apply in order here.
];
let (initial_major, initial_minor, initial_patch) = initial_version;
if initial_version > target_version {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is higher than the Meilisearch version {target_major}.{target_minor}.{target_patch}. Downgrade is not supported",
);
}
if initial_version < (1, 12, 0) {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{initial_major}.{initial_minor}.{initial_patch} and import it in the v{target_major}.{target_minor}.{target_patch}",
);
}
info!("Upgrading the task queue");
let mut wtxn = env.write_txn()?;
let migration_count = upgrade_functions.len();
for (migration_index, upgrade) in upgrade_functions.iter().enumerate() {
if upgrade.must_upgrade(initial_version) {
info!(
"[{migration_index}/{migration_count}]Applying migration: {}",
upgrade.description()
);
upgrade.upgrade(env, &mut wtxn)?;
info!(
"[{}/{migration_count}]Migration applied: {}",
migration_index + 1,
upgrade.description()
)
} else {
info!(
"[{migration_index}/{migration_count}]Skipping unnecessary migration: {}",
upgrade.description()
)
}
}
versioning.set_version(&mut wtxn, target_version)?;
info!("Task queue upgraded, spawning the upgrade database task");
let queue = TaskQueue::new(env, &mut wtxn)?;
let uid = queue.next_task_id(&wtxn)?;
queue.register(
@@ -98,9 +97,9 @@ pub fn upgrade_index_scheduler(
finished_at: None,
error: None,
canceled_by: None,
details: Some(Details::UpgradeDatabase { from, to }),
details: Some(Details::UpgradeDatabase { from: initial_version, to: target_version }),
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from },
kind: KindWithContent::UpgradeDatabase { from: initial_version },
network: None,
custom_metadata: None,
},
@@ -109,21 +108,3 @@ pub fn upgrade_index_scheduler(
Ok(())
}
#[allow(non_camel_case_types)]
struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade(
&self,
_env: &Env<WithoutTls>,
_wtxn: &mut RwTxn,
_original: (u32, u32, u32),
) -> anyhow::Result<()> {
Ok(())
}
fn target_version(&self) -> (u32, u32, u32) {
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
}

View File

@@ -6,7 +6,7 @@ use std::sync::Arc;
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::milli::DeCboRoaringBitmapCodec;
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{
BatchStopReason, Details, IndexSwap, Kind, KindWithContent, Status,
@@ -161,7 +161,7 @@ impl ProcessingBatch {
pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
@@ -174,7 +174,7 @@ pub(crate) fn insert_task_datetime(
pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
@@ -193,7 +193,7 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn remove_n_tasks_datetime_earlier_than(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
earlier_than: OffsetDateTime,
mut count: usize,
task_id: TaskId,
@@ -221,7 +221,7 @@ pub(crate) fn remove_n_tasks_datetime_earlier_than(
pub(crate) fn keep_ids_within_datetimes(
rtxn: &RoTxn,
ids: &mut RoaringBitmap,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>,
) -> Result<()> {

View File

@@ -64,14 +64,7 @@ impl Versioning {
};
wtxn.commit()?;
let bin_major: u32 = versioning::VERSION_MAJOR;
let bin_minor: u32 = versioning::VERSION_MINOR;
let bin_patch: u32 = versioning::VERSION_PATCH;
let to = (bin_major, bin_minor, bin_patch);
if from != to {
upgrade_index_scheduler(env, &this, from, to)?;
}
upgrade_index_scheduler(env, &this, from)?;
// Once we reach this point it means the upgrade process, if there was one is entirely finished
// we can safely say we reached the latest version of the index scheduler

View File

@@ -1,6 +1,7 @@
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {

View File

@@ -1,7 +1,7 @@
use std::any::TypeId;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::sync::Arc;
use std::time::{Duration, Instant};
@@ -300,6 +300,7 @@ impl Infos {
max_indexing_memory,
max_indexing_threads,
skip_index_budget: _,
experimental_disable_delta_encoding: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
@@ -344,14 +345,14 @@ impl Infos {
experimental_no_edition_2024_for_dumps,
experimental_vector_store_setting: vector_store_setting,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
db_path: db_path != Path::new("./data.ms"),
import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"),
dump_dir: dump_dir != Path::new("dumps/"),
ignore_missing_dump,
ignore_dump_if_db_exists,
import_snapshot: import_snapshot.is_some(),
schedule_snapshot,
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
snapshot_dir: snapshot_dir != Path::new("snapshots/"),
uses_s3_snapshots: s3_snapshot_options.is_some(),
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,

View File

@@ -21,6 +21,7 @@ use meilisearch::{
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use meilisearch_types::milli::heed_codec::DELTA_ENCODING_STATUS;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
@@ -95,6 +96,14 @@ async fn main() -> anyhow::Result<()> {
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
// Disables the delta encoding of bitmaps as soon as possible
if opt.indexer_options.experimental_disable_delta_encoding {
DELTA_ENCODING_STATUS.set_to_disabled()
} else {
DELTA_ENCODING_STATUS.set_to_enabled()
}
.expect("the delta-encoding status to be set only once");
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!(

View File

@@ -60,6 +60,7 @@ const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
const MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING: &str = "MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
@@ -845,6 +846,14 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
#[serde(default)]
pub experimental_no_edition_2024_for_facet_post_processing: bool,
/// Experimental disable delta-encoding for bitmaps. For more information,
/// see: <https://github.com/orgs/meilisearch/discussions/875>
///
/// Enables the experimental disable delta-encoding for bitmaps feature.
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING)]
#[serde(default)]
pub experimental_disable_delta_encoding: bool,
}
impl IndexerOpts {
@@ -858,6 +867,7 @@ impl IndexerOpts {
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
experimental_disable_delta_encoding,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
@@ -895,6 +905,12 @@ impl IndexerOpts {
experimental_no_edition_2024_for_facet_post_processing.to_string(),
);
}
if experimental_disable_delta_encoding {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING,
experimental_disable_delta_encoding.to_string(),
);
}
}
}
@@ -910,6 +926,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
experimental_disable_delta_encoding: _, // managed in try_main
} = other;
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
@@ -1245,7 +1262,7 @@ where
T: AsRef<OsStr>,
{
if let Err(VarError::NotPresent) = std::env::var(key) {
std::env::set_var(key, value);
unsafe { std::env::set_var(key, value) }
}
}

View File

@@ -183,7 +183,11 @@ pub async fn get_metrics(
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS.reset();
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
// Fetch the finished batches...
&Query { statuses: Some(vec![Status::Succeeded, Status::Failed]), ..Query::default() },
&Query {
statuses: Some(vec![Status::Succeeded, Status::Failed]),
limit: Some(1),
..Query::default()
},
auth_filters,
)?;
// ...and get the last batch only.

View File

@@ -789,11 +789,12 @@ impl TryFrom<Value> for ExternalDocumentId {
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)]
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)]
#[deserr(rename_all = camelCase)]
#[serde(rename_all = "camelCase")]
pub enum MatchingStrategy {
/// Remove query words from last to first
#[default]
Last,
/// All query words are mandatory
All,
@@ -801,12 +802,6 @@ pub enum MatchingStrategy {
Frequency,
}
impl Default for MatchingStrategy {
fn default() -> Self {
Self::Last
}
}
impl From<MatchingStrategy> for TermsMatchingStrategy {
fn from(other: MatchingStrategy) -> Self {
match other {

View File

@@ -187,7 +187,7 @@ macro_rules! compute_forbidden_search {
#[actix_rt::test]
async fn search_authorized_simple_token() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -239,7 +239,7 @@ async fn search_authorized_simple_token() {
#[actix_rt::test]
async fn search_authorized_filter_token() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"*": {"filter": "color = blue"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -292,7 +292,7 @@ async fn search_authorized_filter_token() {
#[actix_rt::test]
async fn filter_search_authorized_filter_token() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"*": {"filter": "color = blue"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -353,7 +353,7 @@ async fn filter_search_authorized_filter_token() {
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
#[actix_rt::test]
async fn error_search_token_forbidden_parent_key() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -389,7 +389,7 @@ async fn error_search_token_forbidden_parent_key() {
#[actix_rt::test]
async fn error_search_forbidden_token() {
let tenant_tokens = vec![
let tenant_tokens = [
// bad index
hashmap! {
"searchRules" => json!({"products": {}}),

View File

@@ -680,7 +680,7 @@ async fn multi_search_authorized_simple_token() {
#[actix_rt::test]
async fn single_search_authorized_filter_token() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"*": {"filter": "color = blue"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -733,7 +733,7 @@ async fn single_search_authorized_filter_token() {
#[actix_rt::test]
async fn multi_search_authorized_filter_token() {
let both_tenant_tokens = vec![
let both_tenant_tokens = [
hashmap! {
"searchRules" => json!({"sales": {"filter": "color = blue"}, "products": {"filter": "doggos.age <= 5"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -842,7 +842,7 @@ async fn filter_single_search_authorized_filter_token() {
#[actix_rt::test]
async fn filter_multi_search_authorized_filter_token() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"sales": {"filter": "color = blue"}, "products": {"filter": "doggos.age <= 5"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -900,7 +900,7 @@ async fn filter_multi_search_authorized_filter_token() {
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
#[actix_rt::test]
async fn error_single_search_token_forbidden_parent_key() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
@@ -941,7 +941,7 @@ async fn error_single_search_token_forbidden_parent_key() {
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
#[actix_rt::test]
async fn error_multi_search_token_forbidden_parent_key() {
let tenant_tokens = vec![
let tenant_tokens = [
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())

View File

@@ -43,9 +43,9 @@ impl Server<Owned> {
let dir = TempDir::new().unwrap();
if cfg!(windows) {
std::env::set_var("TMP", TEST_TEMP_DIR.path());
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
} else {
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
}
let options = default_settings(dir.path());
@@ -58,9 +58,9 @@ impl Server<Owned> {
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
if cfg!(windows) {
std::env::set_var("TMP", TEST_TEMP_DIR.path());
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
} else {
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
}
options.master_key = Some("MASTER_KEY".to_string());
@@ -215,9 +215,9 @@ impl Server<Shared> {
let dir = TempDir::new().unwrap();
if cfg!(windows) {
std::env::set_var("TMP", TEST_TEMP_DIR.path());
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
} else {
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
}
let options = default_settings(dir.path());
@@ -508,6 +508,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
experimental_no_edition_2024_for_dumps: false,
experimental_no_edition_2024_for_prefix_post_processing: false,
experimental_no_edition_2024_for_facet_post_processing: false,
// It has no effect to set the delta encoding here as the toggle is done in try_main
experimental_disable_delta_encoding: false,
},
experimental_enable_metrics: false,
..Parser::parse_from(None as Option<&str>)

View File

@@ -197,7 +197,7 @@ test_setting_routes!(
{
setting: vector_store,
update_verb: patch,
default_value: null
default_value: "experimental"
},
);

View File

@@ -2,6 +2,7 @@ mod chat;
mod distinct;
mod errors;
mod get_settings;
mod parent_seachable_fields;
mod prefix_search_settings;
mod proximity_settings;
mod tokenizer_customization;

View File

@@ -0,0 +1,114 @@
use meili_snap::{json_string, snapshot};
use once_cell::sync::Lazy;
use crate::common::Server;
use crate::json;
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
json!([
{
"id": 1,
"meta": {
"title": "Soup of the day",
"description": "many the fish",
}
},
{
"id": 2,
"meta": {
"title": "Soup of day",
"description": "many the lazy fish",
}
},
{
"id": 3,
"meta": {
"title": "the Soup of day",
"description": "many the fish",
}
},
])
});
#[actix_rt::test]
async fn nested_field_becomes_searchable() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["meta.title"]
}))
.await;
assert_eq!("202", code.as_str(), "{response:?}");
server.wait_task(response.uid()).await.succeeded();
// We expect no documents when searching for
// a nested non-searchable field
index
.search(json!({"q": "many fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"[]"###);
})
.await;
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["meta.title", "meta.description"]
}))
.await;
assert_eq!("202", code.as_str(), "{response:?}");
server.wait_task(response.uid()).await.succeeded();
// We expect all the documents when the nested field becomes searchable
index
.search(json!({"q": "many fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"meta": {
"title": "Soup of the day",
"description": "many the fish"
}
},
{
"id": 3,
"meta": {
"title": "the Soup of day",
"description": "many the fish"
}
},
{
"id": 2,
"meta": {
"title": "Soup of day",
"description": "many the lazy fish"
}
}
]
"###);
})
.await;
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["meta.title"]
}))
.await;
assert_eq!("202", code.as_str(), "{response:?}");
server.wait_task(response.uid()).await.succeeded();
// We expect no documents when searching for
// a nested non-searchable field
index
.search(json!({"q": "many fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"[]"###);
})
.await;
}

View File

@@ -42,8 +42,16 @@ async fn version_too_old() {
std::fs::create_dir_all(&db_path).unwrap();
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.28.1");
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err().to_string();
let major = meilisearch_types::versioning::VERSION_MAJOR;
let minor = meilisearch_types::versioning::VERSION_MINOR;
let patch = meilisearch_types::versioning::VERSION_PATCH;
let current_version = format!("{major}.{minor}.{patch}");
let err = err.replace(&current_version, "[current version]");
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v[current version]");
}
#[actix_rt::test]
@@ -54,11 +62,21 @@ async fn version_requires_downgrade() {
std::fs::create_dir_all(&db_path).unwrap();
let major = meilisearch_types::versioning::VERSION_MAJOR;
let minor = meilisearch_types::versioning::VERSION_MINOR;
let patch = meilisearch_types::versioning::VERSION_PATCH + 1;
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let mut patch = meilisearch_types::versioning::VERSION_PATCH;
let current_version = format!("{major}.{minor}.{patch}");
patch += 1;
let future_version = format!("{major}.{minor}.{patch}");
std::fs::write(db_path.join("VERSION"), &future_version).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.28.2 is higher than the Meilisearch version 1.28.1. Downgrade is not supported");
let err = err.to_string();
let err = err.replace(&current_version, "[current version]");
let err = err.replace(&future_version, "[future version]");
snapshot!(err, @"Database version [future version] is higher than the Meilisearch version [current version]. Downgrade is not supported");
}
#[actix_rt::test]

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.1"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -166,55 +166,55 @@ async fn check_the_index_scheduler(server: &Server) {
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
// The other tasks should NOT change
let (tasks, _) = server.tasks_filter("limit=1000").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
let (batches, _) = server.batches_filter("limit=1000").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
// Tests all the tasks query parameters
let (tasks, _) = server.tasks_filter("uids=10").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
let (tasks, _) = server.tasks_filter("batchUids=10").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
let (tasks, _) = server.tasks_filter("statuses=canceled").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database
let (tasks, _) = server.tasks_filter("canceledBy=19").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
let (tasks, _) = server.tasks_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
// Tests all the batches query parameters
let (batches, _) = server.batches_filter("uids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
let (batches, _) = server.batches_filter("batchUids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
let (batches, _) = server.batches_filter("statuses=canceled").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database
let (batches, _) = server.batches_filter("canceledBy=19").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, {

View File

@@ -104,8 +104,8 @@ async fn binary_quantize_before_sending_documents() {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
0.0,
0.0,
1.0
]
],
@@ -122,7 +122,7 @@ async fn binary_quantize_before_sending_documents() {
[
1.0,
1.0,
-1.0
0.0
]
],
"regenerate": false
@@ -191,8 +191,8 @@ async fn binary_quantize_after_sending_documents() {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
0.0,
0.0,
1.0
]
],
@@ -209,7 +209,7 @@ async fn binary_quantize_after_sending_documents() {
[
1.0,
1.0,
-1.0
0.0
]
],
"regenerate": false

View File

@@ -0,0 +1,43 @@
use meili_snap::snapshot;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
#[actix_rt::test]
async fn hf_bge_m3_force_cls_settings() {
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index
.update_settings(json!({
"embedders": {
"default": {
"source": "huggingFace",
"model": "baai/bge-m3",
"revision": "5617a9f61b028005a4858fdac845db406aefb181",
"pooling": "forceCls",
// minimal template to allow potential document embedding if used later
"documentTemplate": "{{doc.title}}"
}
}
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
// Try to embed one simple document
let (task, code) =
index.add_documents(json!([{ "id": 1, "title": "Hello world" }]), None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(task.uid()).await.succeeded();
// Retrieve the document with vectors and assert embeddings were produced
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
let has_vectors = documents["results"][0]["_vectors"]["default"]["embeddings"]
.as_array()
.map(|a| !a.is_empty())
.unwrap_or(false);
snapshot!(has_vectors, @"true");
}

View File

@@ -1,5 +1,6 @@
mod binary_quantized;
mod fragments;
mod huggingface;
#[cfg(feature = "test-ollama")]
mod ollama;
mod openai;

View File

@@ -500,13 +500,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
@@ -527,6 +520,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -540,13 +540,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
@@ -567,6 +560,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -581,18 +581,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -602,11 +595,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -621,18 +621,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -642,11 +635,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -661,18 +661,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -682,11 +675,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -701,18 +701,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -722,11 +715,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync",
] }
arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.0.9-nested-rtxns-2", features = ["arroy"] }
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }
@@ -120,14 +120,16 @@ twox-hash = { version = "2.1.2", default-features = false, features = [
] }
geo-types = "0.7.17"
zerometry = "0.3.0"
bitpacking = "0.9.2"
[dev-dependencies]
mimalloc = { version = "0.1.48", default-features = false }
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
mimalloc = { version = "0.1.48", default-features = false }
maplit = "1.0.2"
md5 = "0.8.0"
meili-snap = { path = "../meili-snap" }
quickcheck = "1.0.3"
rand = { version = "0.8.5", features = ["small_rng"] }
[features]

View File

@@ -18,6 +18,8 @@ use crate::{
pub struct Metadata {
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
pub searchable: Option<Weight>,
/// The field is part of the exact attributes.
pub exact: bool,
/// The field is part of the sortable attributes.
pub sortable: bool,
/// The field is defined as the distinct attribute.
@@ -209,6 +211,7 @@ impl Metadata {
#[derive(Debug, Clone)]
pub struct MetadataBuilder {
searchable_attributes: Option<Vec<String>>,
exact_searchable_attributes: Vec<String>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
@@ -220,15 +223,18 @@ impl MetadataBuilder {
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
.map(|fields| fields.into_iter().map(String::from).collect());
let exact_searchable_attributes =
index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
let sortable_attributes = index.sortable_fields(rtxn)?;
let localized_attributes = index.localized_attributes_rules(rtxn)?;
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
let distinct_attribute = index.distinct_field(rtxn)?.map(String::from);
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
Ok(Self::new(
searchable_attributes,
exact_searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
@@ -242,6 +248,7 @@ impl MetadataBuilder {
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
pub fn new(
searchable_attributes: Option<Vec<String>>,
exact_searchable_attributes: Vec<String>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
@@ -256,6 +263,7 @@ impl MetadataBuilder {
Self {
searchable_attributes,
exact_searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
@@ -269,6 +277,7 @@ impl MetadataBuilder {
// Vectors fields are not searchable, filterable, distinct or asc_desc
return Metadata {
searchable: None,
exact: false,
sortable: false,
distinct: false,
asc_desc: false,
@@ -296,6 +305,7 @@ impl MetadataBuilder {
// Geo fields are not searchable, distinct or asc_desc
return Metadata {
searchable: None,
exact: false,
sortable,
distinct: false,
asc_desc: false,
@@ -309,6 +319,7 @@ impl MetadataBuilder {
debug_assert!(!sortable, "geojson fields should not be sortable");
return Metadata {
searchable: None,
exact: false,
sortable,
distinct: false,
asc_desc: false,
@@ -329,6 +340,8 @@ impl MetadataBuilder {
None => Some(0),
};
let exact = self.exact_searchable_attributes.iter().any(|attr| is_faceted_by(field, attr));
let distinct =
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
let asc_desc = self.asc_desc_attributes.contains(field);
@@ -343,6 +356,7 @@ impl MetadataBuilder {
Metadata {
searchable,
exact,
sortable,
distinct,
asc_desc,

View File

@@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
pub use self::ordered_f64_codec::OrderedF64Codec;
use super::StrRefCodec;
use crate::{CboRoaringBitmapCodec, BEU16};
use crate::{DeCboRoaringBitmapCodec, BEU16};
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
@@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![value.size];
CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
DeCboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v)?;
Ok(Cow::Owned(v))
}
}
@@ -107,7 +107,7 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Ok(FacetGroupValue { size, bitmap })
}
}

View File

@@ -22,7 +22,9 @@ pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap::{
BoRoaringBitmapCodec, DeCboRoaringBitmapCodec, RoaringBitmapCodec, DELTA_ENCODING_STATUS,
};
pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};

View File

@@ -19,8 +19,19 @@ pub const THRESHOLD: usize = 7;
pub struct CboRoaringBitmapCodec;
impl CboRoaringBitmapCodec {
/// If the number of items (u32s) to encode is less than or equal to the threshold
/// it means that it would weigh the same or less than the RoaringBitmap
/// header, so we directly encode them using ByteOrder instead.
pub fn bitmap_serialize_as_raw_u32s(roaring: &RoaringBitmap) -> bool {
roaring.len() <= THRESHOLD as u64
}
pub fn bytes_deserialize_as_raw_u32s(bytes: &[u8]) -> bool {
bytes.len() <= THRESHOLD * size_of::<u32>()
}
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
if roaring.len() <= THRESHOLD as u64 {
if Self::bitmap_serialize_as_raw_u32s(roaring) {
roaring.len() as usize * size_of::<u32>()
} else {
roaring.serialized_size()
@@ -35,10 +46,7 @@ impl CboRoaringBitmapCodec {
roaring: &RoaringBitmap,
mut writer: W,
) -> io::Result<()> {
if roaring.len() <= THRESHOLD as u64 {
// If the number of items (u32s) to encode is less than or equal to the threshold
// it means that it would weigh the same or less than the RoaringBitmap
// header, so we directly encode them using ByteOrder instead.
if Self::bitmap_serialize_as_raw_u32s(roaring) {
for integer in roaring {
writer.write_u32::<NativeEndian>(integer)?;
}
@@ -51,7 +59,7 @@ impl CboRoaringBitmapCodec {
}
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
if bytes.len() <= THRESHOLD * size_of::<u32>() {
if Self::bytes_deserialize_as_raw_u32s(bytes) {
// If there is threshold or less than threshold integers that can fit into this array
// of bytes it means that we used the ByteOrder codec serializer.
let mut bitmap = RoaringBitmap::new();
@@ -71,7 +79,7 @@ impl CboRoaringBitmapCodec {
other: &RoaringBitmap,
) -> io::Result<RoaringBitmap> {
// See above `deserialize_from` method for implementation details.
if bytes.len() <= THRESHOLD * size_of::<u32>() {
if Self::bytes_deserialize_as_raw_u32s(bytes) {
let mut bitmap = RoaringBitmap::new();
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
if other.contains(integer) {
@@ -98,7 +106,7 @@ impl CboRoaringBitmapCodec {
let mut vec = Vec::new();
for bytes in slices {
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
if Self::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
let mut reader = bytes.as_ref();
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
vec.push(integer);
@@ -112,6 +120,8 @@ impl CboRoaringBitmapCodec {
vec.sort_unstable();
vec.dedup();
// Be careful when modifying this condition,
// the rule must be the same everywhere
if vec.len() <= THRESHOLD {
for integer in vec {
buffer.extend_from_slice(&integer.to_ne_bytes());

View File

@@ -0,0 +1,242 @@
use std::borrow::Cow;
use std::io::{self, Cursor, ErrorKind};
use std::sync::OnceLock;
use byteorder::{NativeEndian, ReadBytesExt as _};
use heed::BoxedError;
use roaring::RoaringBitmap;
use super::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
use super::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
use crate::heed_codec::BytesDecodeOwned;
use crate::update::del_add::KvReaderDelAdd;
/// Defines the status of the delta encoding on whether we have enabled it or not.
pub static DELTA_ENCODING_STATUS: DeltaEncodingStatusLock = DeltaEncodingStatusLock::new();
pub struct DeCboRoaringBitmapCodec;
impl DeCboRoaringBitmapCodec {
pub fn serialized_size_with_tmp_buffer(
bitmap: &RoaringBitmap,
tmp_buffer: &mut Vec<u32>,
) -> usize {
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|| DELTA_ENCODING_STATUS.is_disabled()
{
CboRoaringBitmapCodec::serialized_size(bitmap)
} else {
DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, tmp_buffer)
}
}
/// Writes the delta-encoded compressed version of
/// the given roaring bitmap into the provided writer.
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: &mut W) -> io::Result<()> {
let mut tmp_buffer = Vec::new();
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
}
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
///
/// Note that we always serialize the bitmap with the delta-encoded compressed version.
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
bitmap: &RoaringBitmap,
writer: &mut W,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<()> {
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|| DELTA_ENCODING_STATUS.is_disabled()
{
CboRoaringBitmapCodec::serialize_into_writer(bitmap, writer)
} else {
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(bitmap, writer, tmp_buffer)
}
}
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
let mut tmp_buffer = Vec::new();
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
}
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
///
/// It tries to decode the input by using the delta-decoded version and
/// if it fails, falls back to the CboRoaringBitmap version.
pub fn deserialize_from_with_tmp_buffer(
input: &[u8],
tmp_buffer: &mut Vec<u32>,
) -> io::Result<RoaringBitmap> {
// The input is too short to be a valid delta-decoded bitmap.
// We fall back to the CboRoaringBitmap version with raw u32s.
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(input) {
return CboRoaringBitmapCodec::deserialize_from(input);
}
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(input, |_, _| true, tmp_buffer)
{
Ok(bitmap) => Ok(bitmap),
// If the error kind is Other it means that the delta-decoder found
// an invalid magic header. We fall back to the CboRoaringBitmap version.
Err(e) if e.kind() == ErrorKind::Other => {
CboRoaringBitmapCodec::deserialize_from(input)
}
Err(e) => Err(e),
}
}
/// Merge serialized DeCboRoaringBitmaps in a buffer.
///
/// If the merged values length is under the threshold, values are directly
/// serialized in the buffer else a delta-encoded list of integers is created
/// from the values and is serialized in the buffer.
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
where
I: IntoIterator<Item = A>,
A: AsRef<[u8]>,
{
let mut roaring = RoaringBitmap::new();
let mut vec = Vec::new();
let mut tmp_buffer = Vec::new();
for bytes in slices {
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
let mut reader = bytes.as_ref();
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
vec.push(integer);
}
} else {
roaring |= DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
bytes.as_ref(),
&mut tmp_buffer,
)?;
}
}
roaring.extend(vec);
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&roaring, buffer, &mut tmp_buffer)?;
Ok(())
}
/// Do an intersection directly with a serialized delta-encoded bitmap.
///
/// When doing the intersection we only need to deserialize the necessary
/// bitmap containers and avoid a lot of unnecessary allocations. We do
/// that by skipping entire delta-encoded blocks when possible to avoid
/// storing them in the bitmap we use for the final intersection.
pub fn intersection_with_serialized(
bytes: &[u8],
other: &RoaringBitmap,
) -> io::Result<RoaringBitmap> {
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes) {
return CboRoaringBitmapCodec::intersection_with_serialized(bytes, other);
}
// TODO move this tmp buffer outside
let mut tmp_buffer = Vec::new();
let filter_block = |first, last| {
// Rank returns the number of elements less than or equal
// to the given value. Doing the difference between the
// ranks of the last and first elements gives the number
// of elements in the range. We don't use the range method
// because the ExactSizeIterator::len method always returns
// usize::MAX.
let last_rank = other.rank(last);
let first_rank = other.rank(first);
last_rank - first_rank != 0
};
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
bytes,
filter_block,
&mut tmp_buffer,
) {
Ok(bitmap) => Ok(bitmap & other),
// If the error kind is Other it means that the delta-decoder found
// an invalid magic header. We fall back to the CboRoaringBitmap version.
Err(e) if e.kind() == ErrorKind::Other => {
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
}
Err(e) => Err(e),
}
}
pub fn merge_deladd_into<'a>(
deladd: &KvReaderDelAdd,
previous: &[u8],
buffer: &'a mut Vec<u8>,
) -> io::Result<Option<&'a [u8]>> {
todo!()
}
}
impl heed::BytesDecode<'_> for DeCboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for DeCboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl heed::BytesEncode<'_> for DeCboRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
let mut tmp_buffer = Vec::new();
let capacity = Self::serialized_size_with_tmp_buffer(&item, &mut tmp_buffer);
let mut output = Vec::with_capacity(capacity);
Self::serialize_into_with_tmp_buffer(item, &mut output, &mut tmp_buffer)?;
Ok(Cow::Owned(output))
}
}
/// Manages the global status of the delta encoding.
///
/// Whether we must use delta encoding or not when encoding roaring bitmaps.
pub struct DeltaEncodingStatusLock(OnceLock<DeltaEncodingStatus>);
impl DeltaEncodingStatusLock {
pub const fn new() -> Self {
Self(OnceLock::new())
}
}
#[derive(Default)]
enum DeltaEncodingStatus {
Enabled,
#[default]
Disabled,
}
impl DeltaEncodingStatusLock {
pub fn set_to_enabled(&self) -> Result<(), ()> {
self.0.set(DeltaEncodingStatus::Enabled).map_err(drop)
}
pub fn set_to_disabled(&self) -> Result<(), ()> {
self.0.set(DeltaEncodingStatus::Disabled).map_err(drop)
}
pub fn is_enabled(&self) -> bool {
matches!(self.0.get(), Some(DeltaEncodingStatus::Enabled))
}
pub fn is_disabled(&self) -> bool {
!self.is_enabled()
}
}

View File

@@ -0,0 +1,383 @@
use std::io::{self, ErrorKind};
use std::mem::{self, size_of, size_of_val};
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
use roaring::RoaringBitmap;
/// The magic header for our custom encoding format
const MAGIC_HEADER: u16 = 36869;
pub struct DeRoaringBitmapCodec;
// TODO reintroduce:
// - serialized_size?
// - serialize_into_vec
// - intersection_with_serialized
// - merge_into
// - merge_deladd_into
impl DeRoaringBitmapCodec {
/// Returns the serialized size of the given roaring bitmap with the delta encoding format.
pub fn serialized_size_with_tmp_buffer(
bitmap: &RoaringBitmap,
tmp_buffer: &mut Vec<u32>,
) -> usize {
let mut size = 2; // u16 magic header
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
// This temporary buffer is used to store each chunk of decompressed u32s.
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
let decompressed = &mut tmp_buffer[..];
let mut buffer_index = 0;
let mut initial = None;
// We initially collect all the integers into a flat buffer of the size
// of the largest bitpacker. We encode them with it until we don't have
// enough of them...
for n in bitmap {
decompressed[buffer_index] = n;
buffer_index += 1;
if buffer_index == BitPacker8x::BLOCK_LEN {
let num_bits = bitpacker8x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker8x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = Some(n);
buffer_index = 0;
}
}
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
let decompressed = &decompressed[..buffer_index];
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let num_bits = bitpacker4x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker4x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = decompressed.iter().last().copied();
}
// ...And so on...
let decompressed = chunks.remainder();
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let num_bits = bitpacker1x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker1x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = decompressed.iter().last().copied();
}
// ...Until we don't have any small enough bitpacker. We put them raw
// at the end of out buffer with a header indicating the matter.
let decompressed = chunks.remainder();
if !decompressed.is_empty() {
size += 1; // u8 chunk header
size += mem::size_of_val(decompressed); // remaining uncompressed u32s
}
size
}
/// Writes the delta-encoded compressed version of the given roaring bitmap
/// into the provided writer. Accepts a buffer to avoid allocating one.
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
bitmap: &RoaringBitmap,
mut writer: W,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<()> {
// Insert the magic header
writer.write_all(&MAGIC_HEADER.to_ne_bytes())?;
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
// This temporary buffer is used to store each chunk of decompressed and
// compressed and delta-encoded u32s. We need room for the decompressed
// u32s coming from the roaring bitmap, the compressed output that can
// be as large as the decompressed u32s, and the chunk header.
tmp_buffer.resize((BitPacker8x::BLOCK_LEN * 2) + 1, 0u32);
let (decompressed, compressed) = tmp_buffer.split_at_mut(BitPacker8x::BLOCK_LEN);
let compressed = bytemuck::cast_slice_mut(compressed);
let mut buffer_index = 0;
let mut initial = None;
// We initially collect all the integers into a flat buffer of the size
// of the largest bitpacker. We encode them with it until we don't have
// enough of them...
for n in bitmap {
decompressed[buffer_index] = n;
buffer_index += 1;
if buffer_index == BitPacker8x::BLOCK_LEN {
let output = encode_with_packer(&bitpacker8x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = Some(n);
buffer_index = 0;
}
}
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
let decompressed = &decompressed[..buffer_index];
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let output = encode_with_packer(&bitpacker4x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = decompressed.iter().last().copied();
}
// ...And so on...
let decompressed = chunks.remainder();
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let output = encode_with_packer(&bitpacker1x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = decompressed.iter().last().copied();
}
// ...Until we don't have any small enough bitpacker. We put them raw
// at the end of out buffer with a header indicating the matter.
let decompressed = chunks.remainder();
if !decompressed.is_empty() {
let header = encode_chunk_header(BitPackerLevel::None, u32::BITS as u8);
// Note: Not convinced about the performance of writing a single
// byte followed by a larger write. However, we will use this
// codec with a BufWriter or directly with a Vec of bytes.
writer.write_all(&[header])?;
writer.write_all(bytemuck::cast_slice(decompressed))?;
}
Ok(())
}
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
///
/// The `filter_block` function is used to filter out blocks. It takes the first
/// and last u32 values of a block and returns `true` if the block must be kept.
pub fn deserialize_from_with_tmp_buffer<F>(
input: &[u8],
filter_block: F,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<RoaringBitmap>
where
F: Fn(u32, u32) -> bool,
{
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
else {
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
};
// Safety: This unwrap cannot happen as the header buffer is the right size
let header = u16::from_ne_bytes(header.try_into().unwrap());
if header != MAGIC_HEADER {
return Err(io::Error::other("invalid header value"));
}
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
let mut bitmap = RoaringBitmap::new();
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
let decompressed = &mut tmp_buffer[..];
let mut initial = None;
while let Some((&chunk_header, encoded)) = compressed.split_first() {
let (level, num_bits) = decode_chunk_header(chunk_header);
let (bytes_read, decompressed) = match level {
BitPackerLevel::None => {
if num_bits != u32::BITS as u8 {
return Err(io::Error::new(
ErrorKind::InvalidData,
"invalid number of bits to encode non-compressed u32s",
));
}
let chunks = encoded.chunks_exact(size_of::<u32>());
if !chunks.remainder().is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"expecting last chunk to be a multiple of the size of an u32",
));
}
let integers = chunks
// safety: This unwrap cannot happen as
// the size of u32 is set correctly.
.map(|b| b.try_into().unwrap())
.map(u32::from_ne_bytes);
if let Some((first, last)) =
integers.clone().next().zip(integers.clone().last())
{
if !(filter_block)(first, last) {
bitmap
.append(integers)
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
}
}
// This is basically always the last chunk that exists in
// this delta-encoded format as the raw u32s are appended
// when there is not enough of them to fit in a bitpacker.
break;
}
BitPackerLevel::BitPacker1x => {
decode_with_packer(&bitpacker1x, decompressed, initial, encoded, num_bits)
}
BitPackerLevel::BitPacker4x => {
decode_with_packer(&bitpacker4x, decompressed, initial, encoded, num_bits)
}
BitPackerLevel::BitPacker8x => {
decode_with_packer(&bitpacker8x, decompressed, initial, encoded, num_bits)
}
};
initial = decompressed.iter().last().copied();
if let Some((first, last)) = decompressed.first().copied().zip(initial) {
if !(filter_block)(first, last) {
// TODO investigate perf
// Safety: Bitpackers cannot output unsorter integers when
// used with the compress_strictly_sorted function.
bitmap.append(decompressed.iter().copied()).unwrap();
}
}
// What the delta-decoding read plus the chunk header size
compressed = &compressed[bytes_read + 1..];
}
Ok(bitmap)
}
}
/// Takes a strickly sorted list of u32s and outputs delta-encoded
/// bytes with a chunk header. We expect the output buffer to be
/// at least BLOCK_LEN + 1.
fn encode_with_packer<'c, B: BitPackerExt>(
bitpacker: &B,
decompressed: &[u32],
initial: Option<u32>,
output: &'c mut [u8],
) -> &'c [u8] {
let num_bits = bitpacker.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = B::compressed_block_size(num_bits);
let chunk_header = encode_chunk_header(B::level(), num_bits);
let buffer = &mut output[..compressed_len + 1];
// Safety: The buffer is at least one byte
let (header_in_buffer, encoded) = buffer.split_first_mut().unwrap();
*header_in_buffer = chunk_header;
bitpacker.compress_strictly_sorted(initial, decompressed, encoded, num_bits);
buffer
}
/// Returns the number of bytes read and the decoded unsigned integers.
fn decode_with_packer<'d, B: BitPacker>(
bitpacker: &B,
decompressed: &'d mut [u32],
initial: Option<u32>,
compressed: &[u8],
num_bits: u8,
) -> (usize, &'d [u32]) {
let decompressed = &mut decompressed[..B::BLOCK_LEN];
let read = bitpacker.decompress_strictly_sorted(initial, compressed, decompressed, num_bits);
(read, decompressed)
}
/// An identifier for the bitpacker to be able
/// to correctly decode the compressed integers.
#[derive(Debug, PartialEq, Eq)]
#[repr(u8)]
enum BitPackerLevel {
/// The remaining bytes are raw little endian encoded u32s.
None,
/// The remaining bits are encoded using a `BitPacker1x`.
BitPacker1x,
/// The remaining bits are encoded using a `BitPacker4x`.
BitPacker4x,
/// The remaining bits are encoded using a `BitPacker8x`.
BitPacker8x,
}
/// Returns the chunk header based on the bitpacker level
/// and the number of bits to encode the list of integers.
fn encode_chunk_header(level: BitPackerLevel, num_bits: u8) -> u8 {
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
let level = level as u8;
debug_assert!(level <= 3);
num_bits | (level << 6)
}
/// Decodes the chunk header and output the bitpacker level
/// and the number of bits to decode the following bytes.
fn decode_chunk_header(data: u8) -> (BitPackerLevel, u8) {
let num_bits = data & 0b00111111;
let level = match data >> 6 {
0 => BitPackerLevel::None,
1 => BitPackerLevel::BitPacker1x,
2 => BitPackerLevel::BitPacker4x,
3 => BitPackerLevel::BitPacker8x,
invalid => panic!("Invalid bitpacker level: {invalid}"),
};
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
(level, num_bits)
}
/// A simple helper trait to get the BitPackerLevel
/// and correctly generate the chunk header.
trait BitPackerExt: BitPacker {
/// Returns the level of the bitpacker: an identifier to be
/// able to decode the numbers with the right bitpacker.
fn level() -> BitPackerLevel;
}
impl BitPackerExt for BitPacker8x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker8x
}
}
impl BitPackerExt for BitPacker4x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker4x
}
}
impl BitPackerExt for BitPacker1x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker1x
}
}
#[cfg(test)]
mod tests {
use quickcheck::quickcheck;
use roaring::RoaringBitmap;
use super::DeRoaringBitmapCodec;
quickcheck! {
fn qc_random(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
let decompressed = DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed[..], |_, _| true, &mut tmp_buffer).unwrap();
decompressed == bitmap
}
}
quickcheck! {
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
let expected_len = DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
compressed.len() == expected_len
}
}
}

View File

@@ -1,7 +1,10 @@
mod bo_roaring_bitmap_codec;
pub mod cbo_roaring_bitmap_codec;
pub mod de_cbo_roaring_bitmap_codec;
mod de_roaring_bitmap_codec;
mod roaring_bitmap_codec;
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
// pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
pub use self::de_cbo_roaring_bitmap_codec::{DeCboRoaringBitmapCodec, DELTA_ENCODING_STATUS};
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;

View File

@@ -34,7 +34,7 @@ use crate::update::new::StdResult;
use crate::vector::db::IndexEmbeddingConfigs;
use crate::vector::{Embedding, VectorStore, VectorStoreBackend, VectorStoreStats};
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
default_criteria, Criterion, DeCboRoaringBitmapCodec, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
@@ -133,38 +133,38 @@ pub struct Index {
pub external_documents_ids: Database<Str, BEU32>,
/// A word and all the documents ids containing the word.
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
pub word_docids: Database<Str, DeCboRoaringBitmapCodec>,
/// A word and all the documents ids containing the word, from attributes for which typos are not allowed.
pub exact_word_docids: Database<Str, CboRoaringBitmapCodec>,
pub exact_word_docids: Database<Str, DeCboRoaringBitmapCodec>,
/// A prefix of word and all the documents ids containing this prefix.
pub word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
pub word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
/// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
pub exact_word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
pub exact_word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
/// Maps the proximity between a pair of words with all the docids where this relation appears.
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
pub word_pair_proximity_docids: Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
/// Maps the word and the position with the docids that corresponds to it.
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
pub word_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
/// Maps the word and the field id with the docids that corresponds to it.
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
pub word_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
/// Maps the field id and the word count with the docids that corresponds to it.
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, DeCboRoaringBitmapCodec>,
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
pub word_prefix_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
pub word_prefix_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field exists
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
pub facet_id_exists_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field is set as null
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
pub facet_id_is_null_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field is considered empty
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
pub facet_id_is_empty_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
@@ -281,6 +281,9 @@ impl Index {
&mut wtxn,
(constants::VERSION_MAJOR, constants::VERSION_MINOR, constants::VERSION_PATCH),
)?;
// The database before v1.29 defaulted to using arroy, so we
// need to set it explicitly because the new default is hannoy.
this.put_vector_store(&mut wtxn, VectorStoreBackend::Hannoy)?;
}
wtxn.commit()?;

View File

@@ -73,7 +73,7 @@ pub use self::filterable_attributes_rules::{
};
pub use self::heed_codec::{
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
CboRoaringBitmapLenCodec, DeCboRoaringBitmapCodec, FieldIdWordCountCodec, ObkvCodec,
RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
UncheckedU8StrStrCodec,
};

View File

@@ -10,7 +10,7 @@ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
};
use crate::heed_codec::BytesRefCodec;
use crate::{CboRoaringBitmapCodec, DocumentId};
use crate::{DeCboRoaringBitmapCodec, DocumentId};
/// Call the given closure on the facet distribution of the candidate documents.
///
@@ -88,7 +88,7 @@ where
if key.field_id != field_id {
break;
}
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
@@ -120,7 +120,7 @@ where
if key.field_id != field_id {
break;
}
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
@@ -173,7 +173,7 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
@@ -210,7 +210,7 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;

View File

@@ -8,7 +8,7 @@ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
};
use crate::heed_codec::BytesRefCodec;
use crate::{CboRoaringBitmapCodec, Result};
use crate::{DeCboRoaringBitmapCodec, Result};
/// Find all the document ids for which the given field contains a value contained within
/// the two bounds.
@@ -114,11 +114,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
*self.docids |= match self.universe {
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
universe,
)?,
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
None => DeCboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
};
}
}
@@ -211,11 +211,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
};
if should_take_whole_group {
*self.docids |= match self.universe {
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
previous_value.bitmap_bytes,
universe,
)?,
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
};
previous_key = next_key;
previous_value = next_value;
@@ -313,11 +313,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
};
if should_take_whole_group {
*self.docids |= match self.universe {
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
previous_value.bitmap_bytes,
universe,
)?,
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
};
} else {
let level = level - 1;

View File

@@ -385,9 +385,10 @@ pub struct SearchResult {
pub query_vector: Option<Embedding>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub enum TermsMatchingStrategy {
// remove last word first
#[default]
Last,
// all words are mandatory
All,
@@ -395,12 +396,6 @@ pub enum TermsMatchingStrategy {
Frequency,
}
impl Default for TermsMatchingStrategy {
fn default() -> Self {
Self::Last
}
}
impl From<MatchingStrategy> for TermsMatchingStrategy {
fn from(other: MatchingStrategy) -> Self {
match other {

View File

@@ -14,7 +14,7 @@ use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
use crate::proximity::ProximityPrecision;
use crate::update::MergeCboRoaringBitmaps;
use crate::{
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
CboRoaringBitmapLenCodec, DeCboRoaringBitmapCodec, Result, SearchContext, U8StrStrCodec,
};
/// A cache storing pointers to values in the LMDB databases.
@@ -72,11 +72,11 @@ impl<'ctx> DatabaseCache<'ctx> {
match (bitmap_bytes, universe) {
(bytes, Some(universe)) => {
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
.map(Some)
.map_err(Into::into)
}
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
@@ -157,11 +157,11 @@ impl<'ctx> DatabaseCache<'ctx> {
match (bitmap_bytes, universe) {
(bytes, Some(universe)) => {
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
.map(Some)
.map_err(Into::into)
}
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
@@ -377,7 +377,7 @@ impl<'ctx> SearchContext<'ctx> {
{
docids
.as_ref()
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
.map(|d| DeCboRoaringBitmapCodec::bytes_decode_owned(d))
.transpose()
.map_err(heed::Error::Decoding)?
} else {
@@ -395,7 +395,7 @@ impl<'ctx> SearchContext<'ctx> {
docids |= word1_docids & word2_docids;
}
}
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
let encoded = DeCboRoaringBitmapCodec::bytes_encode(&docids)
.map(Cow::into_owned)
.map(Cow::Owned)
.map(Some)

View File

@@ -6,7 +6,7 @@ use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::score_details::{self, ScoreDetails};
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm;
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
use crate::{DeCboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
/// A ranking rule that produces 3 disjoint buckets:
///
@@ -219,7 +219,7 @@ impl State {
match bitmap_bytes {
Some(bytes) => {
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
}
None => RoaringBitmap::default(),
}

View File

@@ -15,7 +15,7 @@ use crate::heed_codec::BytesRefCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
use crate::{CboRoaringBitmapLenCodec, DeCboRoaringBitmapCodec, FieldId, Index, Result};
/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
/// by rebuilding the database "from scratch".
@@ -162,7 +162,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
Some(prev_value) => {
// prev_value is the group size for level 0, followed by the previous bitmap.
let old_bitmap = &prev_value[1..];
CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
DeCboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
}
None => {
// it is safe to ignore the del in that case.

View File

@@ -16,7 +16,7 @@ use crate::search::facet::get_highest_level;
use crate::update::del_add::DelAdd;
use crate::update::index_documents::valid_lmdb_key;
use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{CboRoaringBitmapCodec, Index, Result};
use crate::{DeCboRoaringBitmapCodec, Index, Result};
/// Enum used as a return value for the facet incremental indexing.
///
@@ -112,13 +112,13 @@ impl FacetsUpdateIncremental {
let value = KvReader::from_slice(value);
let docids_to_delete = value
.get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(DeCboRoaringBitmapCodec::bytes_decode)
.map(|o| o.map_err(heed::Error::Encoding))
.transpose()?;
let docids_to_add = value
.get(DelAdd::Addition)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(DeCboRoaringBitmapCodec::bytes_decode)
.map(|o| o.map_err(heed::Error::Encoding))
.transpose()?;

View File

@@ -366,7 +366,7 @@ pub(crate) mod test_helpers {
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::MergeDeladdCboRoaringBitmaps;
use crate::update::FacetsUpdateIncrementalInner;
use crate::CboRoaringBitmapCodec;
use crate::DeCboRoaringBitmapCodec;
/// Utility function to generate a string whose position in a lexicographically
/// ordered list is `i`.
@@ -496,7 +496,7 @@ pub(crate) mod test_helpers {
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
let mut inner_writer = KvWriterDelAdd::memory();
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
let value = DeCboRoaringBitmapCodec::bytes_encode(docids).unwrap();
inner_writer.insert(DelAdd::Addition, value).unwrap();
writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
}

View File

@@ -19,7 +19,7 @@ use crate::facet::value_encoding::f64_into_bytes;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
use crate::{DeCboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
/// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@@ -311,8 +311,8 @@ fn deladd_obkv_cbo_roaring_bitmaps(
) -> io::Result<()> {
buffer.clear();
let mut obkv = KvWriterDelAdd::new(buffer);
let del_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
let add_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
let del_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
let add_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
obkv.insert(DelAdd::Deletion, del_bitmap_bytes)?;
obkv.insert(DelAdd::Addition, add_bitmap_bytes)?;
obkv.finish()

View File

@@ -124,7 +124,7 @@ impl GrenadParameters {
/// This should be called inside of a rayon thread pool,
/// otherwise, it will take the global number of threads.
pub fn max_memory_by_thread(&self) -> Option<usize> {
self.max_memory.map(|max_memory| (max_memory / rayon::current_num_threads()))
self.max_memory.map(|max_memory| max_memory / rayon::current_num_threads())
}
}

View File

@@ -7,7 +7,7 @@ use either::Either;
use grenad::MergeFunction;
use roaring::RoaringBitmap;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::DeCboRoaringBitmapCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::transform::Operation;
use crate::Result;
@@ -200,7 +200,7 @@ impl MergeFunction for MergeCboRoaringBitmaps {
Ok(values[0].clone())
} else {
let mut vec = Vec::new();
CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
DeCboRoaringBitmapCodec::merge_into(values, &mut vec)?;
Ok(Cow::from(vec))
}
}
@@ -232,10 +232,10 @@ impl MergeFunction for MergeDeladdCboRoaringBitmaps {
let mut output_deladd_obkv = KvWriterDelAdd::memory();
let mut buffer = Vec::new();
CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
DeCboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
buffer.clear();
CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
DeCboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
}
@@ -251,7 +251,7 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
previous: &[u8],
buffer: &'a mut Vec<u8>,
) -> Result<Option<&'a [u8]>> {
Ok(CboRoaringBitmapCodec::merge_deladd_into(
Ok(DeCboRoaringBitmapCodec::merge_deladd_into(
KvReaderDelAdd::from_slice(deladd_obkv),
previous,
buffer,

View File

@@ -40,7 +40,7 @@ use crate::update::{
};
use crate::vector::db::EmbedderInfo;
use crate::vector::{RuntimeEmbedders, VectorStore};
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
use crate::{DeCboRoaringBitmapCodec, Index, Result, UserError};
static MERGED_DATABASE_COUNT: usize = 7;
static PREFIX_DATABASE_COUNT: usize = 4;
@@ -54,11 +54,12 @@ pub struct DocumentAdditionResult {
pub number_of_documents: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum IndexDocumentsMethod {
/// Replace the previous document with the new one,
/// removing all the already known attributes.
#[default]
ReplaceDocuments,
/// Merge the previous version of the document with the new version,
@@ -66,12 +67,6 @@ pub enum IndexDocumentsMethod {
UpdateDocuments,
}
impl Default for IndexDocumentsMethod {
fn default() -> Self {
Self::ReplaceDocuments
}
}
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
@@ -769,8 +764,8 @@ where
fn execute_word_prefix_docids(
txn: &mut heed::RwTxn<'_>,
merger: Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>,
word_docids_db: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
word_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
word_prefix_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
indexer_config: &IndexerConfig,
new_prefix_fst_words: &[String],
common_prefix_fst_words: &[&[String]],
@@ -806,6 +801,10 @@ mod tests {
use crate::vector::db::IndexEmbeddingConfig;
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
fn no_cancel() -> bool {
false
}
#[test]
fn simple_document_replacement() {
let index = TempIndex::new();
@@ -1985,7 +1984,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2038,7 +2037,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2057,7 +2056,7 @@ mod tests {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2127,7 +2126,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2146,7 +2145,7 @@ mod tests {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2317,7 +2316,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2333,7 +2332,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2381,7 +2380,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2397,7 +2396,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2436,7 +2435,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2452,7 +2451,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2490,7 +2489,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2506,7 +2505,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2546,7 +2545,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2562,7 +2561,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2607,7 +2606,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2623,7 +2622,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2661,7 +2660,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2677,7 +2676,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2715,7 +2714,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2731,7 +2730,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2927,7 +2926,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2943,7 +2942,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2988,7 +2987,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -3004,7 +3003,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -3046,7 +3045,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -3062,7 +3061,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)

View File

@@ -29,7 +29,7 @@ use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
use crate::vector::VectorStore;
use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
lat_lng_to_xyz, DeCboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec, UserError,
};
@@ -866,7 +866,7 @@ where
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn write_proximity_entries_into_database_additional_searchables<R, MF>(
merger: Merger<R, MF>,
database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
database: &heed::Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
wtxn: &mut RwTxn<'_>,
) -> Result<()>
where
@@ -881,7 +881,7 @@ where
U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
Some(value) => {
CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
DeCboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
}
None => continue,
};

View File

@@ -27,7 +27,7 @@ use crate::index::db_name;
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
use crate::update::new::KvReaderFieldId;
use crate::vector::Embedding;
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
use crate::{DeCboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
/// Note that the FrameProducer requires up to 9 bytes to
/// encode the length, the max grant has been computed accordingly.
@@ -971,7 +971,9 @@ pub struct WordDocidsSender<'a, 'b, D> {
impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
let mut tmp_buffer = Vec::new();
let value_length =
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
InternalError::StorePut {
database_name: D::DATABASE.database_name(),
@@ -986,7 +988,10 @@ impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
value_length,
|key_buffer, value_buffer| {
key_buffer.copy_from_slice(key);
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_buffer)?;
DeCboRoaringBitmapCodec::serialize_into(
bitmap,
&mut io::Cursor::new(value_buffer),
)?;
Ok(())
},
)
@@ -1007,7 +1012,9 @@ impl FacetDocidsSender<'_, '_> {
let (facet_kind, key) = FacetKind::extract_from_key(key);
let database = Database::from(facet_kind);
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
let mut tmp_buffer = Vec::new();
let value_length =
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
let value_length = match facet_kind {
// We must take the facet group size into account
// when we serialize strings and numbers.
@@ -1041,7 +1048,7 @@ impl FacetDocidsSender<'_, '_> {
FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
};
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
DeCboRoaringBitmapCodec::serialize_into(bitmap, &mut io::Cursor::new(value_out))?;
Ok(())
},

View File

@@ -82,7 +82,7 @@ use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::new::thread_local::MostlySend;
use crate::update::new::KvReaderDelAdd;
use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{CboRoaringBitmapCodec, Result};
use crate::{DeCboRoaringBitmapCodec, Result};
/// A cache that stores bytes keys associated to CboDelAddRoaringBitmaps.
///
@@ -323,6 +323,7 @@ struct SpillingCaches<'extractor> {
spilled_entries: Vec<grenad::Sorter<MergeDeladdCboRoaringBitmaps>>,
deladd_buffer: Vec<u8>,
cbo_buffer: Vec<u8>,
tmp_buffer: Vec<u32>,
}
impl<'extractor> SpillingCaches<'extractor> {
@@ -348,6 +349,7 @@ impl<'extractor> SpillingCaches<'extractor> {
caches,
deladd_buffer: Vec::new(),
cbo_buffer: Vec::new(),
tmp_buffer: Vec::new(),
}
}
@@ -370,6 +372,7 @@ impl<'extractor> SpillingCaches<'extractor> {
&mut self.spilled_entries[bucket],
&mut self.deladd_buffer,
&mut self.cbo_buffer,
&mut self.tmp_buffer,
key,
DelAddRoaringBitmap::new_del_u32(n),
),
@@ -395,6 +398,7 @@ impl<'extractor> SpillingCaches<'extractor> {
&mut self.spilled_entries[bucket],
&mut self.deladd_buffer,
&mut self.cbo_buffer,
&mut self.tmp_buffer,
key,
DelAddRoaringBitmap::new_add_u32(n),
),
@@ -411,6 +415,7 @@ fn spill_entry_to_sorter(
spilled_entries: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
deladd_buffer: &mut Vec<u8>,
cbo_buffer: &mut Vec<u8>,
tmp_buffer: &mut Vec<u32>,
key: &[u8],
deladd: DelAddRoaringBitmap,
) -> Result<()> {
@@ -420,21 +425,21 @@ fn spill_entry_to_sorter(
match deladd {
DelAddRoaringBitmap { del: Some(del), add: None } => {
cbo_buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
}
DelAddRoaringBitmap { del: None, add: Some(add) } => {
cbo_buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
}
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
cbo_buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
cbo_buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
}
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
@@ -640,12 +645,12 @@ impl DelAddRoaringBitmap {
let reader = KvReaderDelAdd::from_slice(bytes);
let del = match reader.get(DelAdd::Deletion) {
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
Some(bytes) => DeCboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
None => None,
};
let add = match reader.get(DelAdd::Addition) {
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
Some(bytes) => DeCboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
None => None,
};

View File

@@ -8,17 +8,26 @@ use bumpalo::Bump;
use super::match_searchable_field;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::fields_ids_map::metadata::Metadata;
use crate::update::new::document::DocumentContext;
use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::extract::perm_json_p::contained_in;
use crate::update::new::extract::searchable::has_searchable_children;
use crate::update::new::indexer::document_changes::{
extract, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::indexer::settings_changes::{
settings_change_extract, DocumentsIndentifiers, SettingsChangeExtractor,
};
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
use crate::update::new::{DocumentChange, DocumentIdentifiers};
use crate::update::settings::SettingsDelta;
use crate::{
bucketed_position, DocumentId, FieldId, PatternMatch, Result, UserError,
MAX_POSITION_PER_ATTRIBUTE,
};
const MAX_COUNTED_WORDS: usize = 30;
@@ -34,6 +43,15 @@ pub struct WordDocidsBalancedCaches<'extractor> {
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
/// Whether to extract or skip fields during word extraction.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FieldDbExtraction {
/// Extract the word and put it in to the fid-based databases.
Extract,
/// Do not store the word in the fid-based databases.
Skip,
}
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
Self {
@@ -47,12 +65,14 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
}
}
#[allow(clippy::too_many_arguments)]
fn insert_add_u32(
&mut self,
field_id: FieldId,
position: u16,
word: &str,
exact: bool,
field_db_extraction: FieldDbExtraction,
docid: u32,
bump: &Bump,
) -> Result<()> {
@@ -66,11 +86,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
if field_db_extraction == FieldDbExtraction::Extract {
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
}
let position = bucketed_position(position);
buffer.clear();
@@ -83,21 +105,26 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.flush_fid_word_count(&mut buffer)?;
}
self.fid_word_count
.entry(field_id)
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
.or_insert((None, Some(1)));
if field_db_extraction == FieldDbExtraction::Extract {
self.fid_word_count
.entry(field_id)
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
.or_insert((None, Some(1)));
}
self.current_docid = Some(docid);
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn insert_del_u32(
&mut self,
field_id: FieldId,
position: u16,
word: &str,
exact: bool,
field_db_extraction: FieldDbExtraction,
docid: u32,
bump: &Bump,
) -> Result<()> {
@@ -111,11 +138,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
if field_db_extraction == FieldDbExtraction::Extract {
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
}
let position = bucketed_position(position);
buffer.clear();
@@ -128,10 +157,12 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.flush_fid_word_count(&mut buffer)?;
}
self.fid_word_count
.entry(field_id)
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
.or_insert((Some(1), None));
if field_db_extraction == FieldDbExtraction::Extract {
self.fid_word_count
.entry(field_id)
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
.or_insert((Some(1), None));
}
self.current_docid = Some(docid);
@@ -325,6 +356,24 @@ impl WordDocidsExtractors {
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|| disabled_typos_terms.is_exact(word)
};
let mut should_tokenize = |field_name: &str| {
let Some((field_id, meta)) = new_fields_ids_map.id_with_metadata_or_insert(field_name)
else {
return Err(UserError::AttributeLimitReached.into());
};
let pattern_match = if meta.is_searchable() {
PatternMatch::Match
} else {
// TODO: should be a match on the field_name using `match_field_legacy` function,
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
PatternMatch::Parent
};
Ok((field_id, pattern_match))
};
match document_change {
DocumentChange::Deletion(inner) => {
let mut token_fn = |fname: &str, fid, pos, word: &str| {
@@ -333,13 +382,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.current(rtxn, index, context.db_fields_ids_map)?,
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
}
@@ -361,13 +411,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.current(rtxn, index, context.db_fields_ids_map)?,
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
@@ -377,13 +428,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.merged(rtxn, index, context.db_fields_ids_map)?,
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
}
@@ -394,13 +446,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.inserted(),
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
}
@@ -411,3 +464,292 @@ impl WordDocidsExtractors {
cached_sorter.flush_fid_word_count(&mut buffer)
}
}
pub struct WordDocidsSettingsExtractorsData<'a, SD> {
tokenizer: DocumentTokenizer<'a>,
max_memory_by_thread: Option<usize>,
buckets: usize,
settings_delta: &'a SD,
}
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
for WordDocidsSettingsExtractorsData<'_, SD>
{
type Data = RefCell<Option<WordDocidsBalancedCaches<'extractor>>>;
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
self.buckets,
self.max_memory_by_thread,
extractor_alloc,
))))
}
fn process<'doc>(
&'doc self,
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
context: &'doc DocumentContext<Self::Data>,
) -> crate::Result<()> {
for document in documents {
let document = document?;
SettingsChangeWordDocidsExtractors::extract_document_from_settings_change(
document,
context,
&self.tokenizer,
self.settings_delta,
)?;
}
Ok(())
}
}
pub struct SettingsChangeWordDocidsExtractors;
impl SettingsChangeWordDocidsExtractors {
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
settings_delta: &SD,
documents: &'indexer DocumentsIndentifiers<'indexer>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<WordDocidsCaches<'extractor>>
where
SD: SettingsDelta + Sync,
MSP: Fn() -> bool + Sync,
{
// Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
// TODO we need to read the new AND old settings to support changing global parameters
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
let allowed_separators: Option<Vec<_>> =
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.build();
let localized_attributes_rules =
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tokenizer,
localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
};
let extractor_data = WordDocidsSettingsExtractorsData {
tokenizer: document_tokenizer,
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
buckets: rayon::current_num_threads(),
settings_delta,
};
let datastore = ThreadLocal::new();
{
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
let _entered = span.enter();
settings_change_extract(
documents,
&extractor_data,
indexing_context,
extractor_allocs,
&datastore,
step,
)?;
}
let mut merger = WordDocidsCaches::new();
for cache in datastore.into_iter().flat_map(RefCell::into_inner) {
merger.push(cache)?;
}
Ok(merger)
}
/// Extracts document words from a settings change.
fn extract_document_from_settings_change<SD: SettingsDelta>(
document: DocumentIdentifiers<'_>,
context: &DocumentContext<RefCell<Option<WordDocidsBalancedCaches>>>,
document_tokenizer: &DocumentTokenizer,
settings_delta: &SD,
) -> Result<()> {
let mut cached_sorter_ref = context.data.borrow_mut_or_yield();
let cached_sorter = cached_sorter_ref.as_mut().unwrap();
let doc_alloc = &context.doc_alloc;
let new_fields_ids_map = settings_delta.new_fields_ids_map();
let old_fields_ids_map = context.index.fields_ids_map_with_metadata(&context.rtxn)?;
let old_searchable = settings_delta.old_searchable_attributes().as_ref();
let new_searchable = settings_delta.new_searchable_attributes().as_ref();
let current_document = document.current(
&context.rtxn,
context.index,
old_fields_ids_map.as_fields_ids_map(),
)?;
#[derive(Debug, Clone, Copy, PartialEq)]
enum ActionToOperate {
ReindexAllFields,
// TODO improve by listing field prefixes
IndexAddedFields,
SkipDocument,
}
let mut action = ActionToOperate::SkipDocument;
// Here we do a preliminary check to determine the action to take.
// This check doesn't trigger the tokenizer as we never return
// PatternMatch::Match.
document_tokenizer.tokenize_document(
current_document,
&mut |field_name| {
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
// If the document must be reindexed, early return NoMatch to stop the scanning process.
if action == ActionToOperate::ReindexAllFields {
return Ok((fid, PatternMatch::NoMatch));
}
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
action = match (old_field_metadata, new_field_metadata) {
// At least one field is added or removed from the exact fields => ReindexAllFields
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. })
if old_exact != new_exact =>
{
ActionToOperate::ReindexAllFields
}
// At least one field is removed from the searchable fields => ReindexAllFields
(Metadata { searchable: Some(_), .. }, Metadata { searchable: None, .. }) => {
ActionToOperate::ReindexAllFields
}
// At least one field is added in the searchable fields => IndexAddedFields
(Metadata { searchable: None, .. }, Metadata { searchable: Some(_), .. }) => {
// We can safely overwrite the action, because we early return when action is ReindexAllFields.
ActionToOperate::IndexAddedFields
}
_ => action,
};
Ok((fid, PatternMatch::Parent))
},
&mut |_, _, _, _| Ok(()),
)?;
// Early return when we don't need to index the document
if action == ActionToOperate::SkipDocument {
return Ok(());
}
let mut should_tokenize = |field_name: &str| {
let field_id = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
let pattern_match = match action {
ActionToOperate::ReindexAllFields => {
if old_field_metadata.is_searchable() || new_field_metadata.is_searchable() {
PatternMatch::Match
// If any old or new field is searchable then we need to iterate over all fields
// else if any field matches we need to iterate over all fields
} else if has_searchable_children(
field_name,
old_searchable.zip(new_searchable).map(|(old, new)| old.iter().chain(new)),
) {
PatternMatch::Parent
} else {
PatternMatch::NoMatch
}
}
ActionToOperate::IndexAddedFields => {
// Was not searchable but now is
if !old_field_metadata.is_searchable() && new_field_metadata.is_searchable() {
PatternMatch::Match
// If the field is now a parent of a searchable field
} else if has_searchable_children(field_name, new_searchable) {
PatternMatch::Parent
} else {
PatternMatch::NoMatch
}
}
ActionToOperate::SkipDocument => unreachable!(),
};
Ok((field_id, pattern_match))
};
let old_disabled_typos_terms = settings_delta.old_disabled_typos_terms();
let new_disabled_typos_terms = settings_delta.new_disabled_typos_terms();
let mut token_fn = |_field_name: &str, field_id, pos, word: &str| {
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
match (old_field_metadata, new_field_metadata) {
(
Metadata { searchable: Some(_), exact: old_exact, .. },
Metadata { searchable: None, .. },
) => cached_sorter.insert_del_u32(
field_id,
pos,
word,
old_exact || old_disabled_typos_terms.is_exact(word),
// We deleted the field globally
FieldDbExtraction::Skip,
document.docid(),
doc_alloc,
),
(
Metadata { searchable: None, .. },
Metadata { searchable: Some(_), exact: new_exact, .. },
) => cached_sorter.insert_add_u32(
field_id,
pos,
word,
new_exact || new_disabled_typos_terms.is_exact(word),
FieldDbExtraction::Extract,
document.docid(),
doc_alloc,
),
(Metadata { searchable: None, .. }, Metadata { searchable: None, .. }) => {
unreachable!()
}
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. }) => {
cached_sorter.insert_del_u32(
field_id,
pos,
word,
old_exact || old_disabled_typos_terms.is_exact(word),
// The field has already been extracted
FieldDbExtraction::Skip,
document.docid(),
doc_alloc,
)?;
cached_sorter.insert_add_u32(
field_id,
pos,
word,
new_exact || new_disabled_typos_terms.is_exact(word),
// The field has already been extracted
FieldDbExtraction::Skip,
document.docid(),
doc_alloc,
)
}
}
};
// TODO we must tokenize twice when we change global parameters like stop words,
// the language settings, dictionary, separators, non-separators...
document_tokenizer.tokenize_document(
current_document,
&mut should_tokenize,
&mut token_fn,
)?;
Ok(())
}
}

View File

@@ -6,17 +6,24 @@ use bumpalo::Bump;
use super::match_searchable_field;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::fields_ids_map::metadata::Metadata;
use crate::proximity::ProximityPrecision::*;
use crate::proximity::{index_proximity, MAX_DISTANCE};
use crate::update::new::document::{Document, DocumentContext};
use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::indexer::document_changes::{
extract, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::indexer::settings_change_extract;
use crate::update::new::indexer::settings_changes::{
DocumentsIndentifiers, SettingsChangeExtractor,
};
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
use crate::update::new::{DocumentChange, DocumentIdentifiers};
use crate::update::settings::SettingsDelta;
use crate::{FieldId, PatternMatch, Result, UserError, MAX_POSITION_PER_ATTRIBUTE};
pub struct WordPairProximityDocidsExtractorData<'a> {
tokenizer: DocumentTokenizer<'a>,
@@ -116,7 +123,7 @@ impl WordPairProximityDocidsExtractor {
// and to store the docids of the documents that have a number of words in a given field
// equal to or under than MAX_COUNTED_WORDS.
fn extract_document_change(
context: &DocumentContext<RefCell<BalancedCaches>>,
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
document_tokenizer: &DocumentTokenizer,
searchable_attributes: Option<&[&str]>,
document_change: DocumentChange,
@@ -147,8 +154,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
del_word_pair_proximity.push(((w1, w2), prox));
},
@@ -170,8 +181,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
del_word_pair_proximity.push(((w1, w2), prox));
},
@@ -180,8 +195,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
add_word_pair_proximity.push(((w1, w2), prox));
},
@@ -192,8 +211,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
add_word_pair_proximity.push(((w1, w2), prox));
},
@@ -257,8 +280,8 @@ fn drain_word_positions(
fn process_document_tokens<'doc>(
document: impl Document<'doc>,
document_tokenizer: &DocumentTokenizer,
fields_ids_map: &mut GlobalFieldsIdsMap,
word_positions: &mut VecDeque<(Rc<str>, u16)>,
field_id_and_metadata: &mut impl FnMut(&str) -> Result<(FieldId, Metadata)>,
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
) -> Result<()> {
let mut field_id = None;
@@ -279,8 +302,248 @@ fn process_document_tokens<'doc>(
word_positions.push_back((Rc::from(word), pos));
Ok(())
};
document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
let mut should_tokenize = |field_name: &str| {
let (field_id, meta) = field_id_and_metadata(field_name)?;
let pattern_match = if meta.is_searchable() {
PatternMatch::Match
} else {
// TODO: should be a match on the field_name using `match_field_legacy` function,
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
PatternMatch::Parent
};
Ok((field_id, pattern_match))
};
document_tokenizer.tokenize_document(document, &mut should_tokenize, &mut token_fn)?;
drain_word_positions(word_positions, word_pair_proximity);
Ok(())
}
pub struct WordPairProximityDocidsSettingsExtractorsData<'a, SD> {
tokenizer: DocumentTokenizer<'a>,
max_memory_by_thread: Option<usize>,
buckets: usize,
settings_delta: &'a SD,
}
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
for WordPairProximityDocidsSettingsExtractorsData<'_, SD>
{
type Data = RefCell<BalancedCaches<'extractor>>;
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
Ok(RefCell::new(BalancedCaches::new_in(
self.buckets,
self.max_memory_by_thread,
extractor_alloc,
)))
}
fn process<'doc>(
&'doc self,
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
context: &'doc DocumentContext<Self::Data>,
) -> crate::Result<()> {
for document in documents {
let document = document?;
SettingsChangeWordPairProximityDocidsExtractors::extract_document_from_settings_change(
document,
context,
&self.tokenizer,
self.settings_delta,
)?;
}
Ok(())
}
}
pub struct SettingsChangeWordPairProximityDocidsExtractors;
impl SettingsChangeWordPairProximityDocidsExtractors {
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
settings_delta: &SD,
documents: &'indexer DocumentsIndentifiers<'indexer>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
SD: SettingsDelta + Sync,
MSP: Fn() -> bool + Sync,
{
// Warning: this is duplicated code from extract_word_docids.rs
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
let allowed_separators: Option<Vec<_>> =
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.build();
let localized_attributes_rules =
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tokenizer,
localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
};
let extractor_data = WordPairProximityDocidsSettingsExtractorsData {
tokenizer: document_tokenizer,
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
buckets: rayon::current_num_threads(),
settings_delta,
};
let datastore = ThreadLocal::new();
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids_extraction");
let _entered = span.enter();
settings_change_extract(
documents,
&extractor_data,
indexing_context,
extractor_allocs,
&datastore,
step,
)?;
}
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
}
/// Extracts document words from a settings change.
fn extract_document_from_settings_change<SD: SettingsDelta>(
document: DocumentIdentifiers<'_>,
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
document_tokenizer: &DocumentTokenizer,
settings_delta: &SD,
) -> Result<()> {
let mut cached_sorter = context.data.borrow_mut_or_yield();
let doc_alloc = &context.doc_alloc;
let new_fields_ids_map = settings_delta.new_fields_ids_map();
let old_fields_ids_map = settings_delta.old_fields_ids_map();
let old_proximity_precision = *settings_delta.old_proximity_precision();
let new_proximity_precision = *settings_delta.new_proximity_precision();
let current_document = document.current(
&context.rtxn,
context.index,
old_fields_ids_map.as_fields_ids_map(),
)?;
#[derive(Debug, Clone, Copy, PartialEq)]
enum ActionToOperate {
ReindexAllFields,
SkipDocument,
}
// TODO prefix_fid delete_old_fid_based_databases
let mut action = match (old_proximity_precision, new_proximity_precision) {
(ByAttribute, ByWord) => ActionToOperate::ReindexAllFields,
(_, _) => ActionToOperate::SkipDocument,
};
// Here we do a preliminary check to determine the action to take.
// This check doesn't trigger the tokenizer as we never return
// PatternMatch::Match.
if action != ActionToOperate::ReindexAllFields {
document_tokenizer.tokenize_document(
current_document,
&mut |field_name| {
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
// If the document must be reindexed, early return NoMatch to stop the scanning process.
if action == ActionToOperate::ReindexAllFields {
return Ok((fid, PatternMatch::NoMatch));
}
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
action = match (old_field_metadata, new_field_metadata) {
// At least one field is removed or added from the searchable fields
(
Metadata { searchable: Some(_), .. },
Metadata { searchable: None, .. },
)
| (
Metadata { searchable: None, .. },
Metadata { searchable: Some(_), .. },
) => ActionToOperate::ReindexAllFields,
_ => action,
};
Ok((fid, PatternMatch::Parent))
},
&mut |_, _, _, _| Ok(()),
)?;
}
// Early return when we don't need to index the document
if action == ActionToOperate::SkipDocument {
return Ok(());
}
let mut del_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
let mut add_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
// is a vecdequeue, and will be smol, so can stay on the heap for now
let mut word_positions: VecDeque<(Rc<str>, u16)> =
VecDeque::with_capacity(MAX_DISTANCE as usize);
process_document_tokens(
current_document,
// TODO Tokenize must be based on old settings
document_tokenizer,
&mut word_positions,
&mut |field_name| {
Ok(old_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
},
&mut |(w1, w2), prox| {
del_word_pair_proximity.push(((w1, w2), prox));
},
)?;
process_document_tokens(
current_document,
// TODO Tokenize must be based on new settings
document_tokenizer,
&mut word_positions,
&mut |field_name| {
Ok(new_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
},
&mut |(w1, w2), prox| {
add_word_pair_proximity.push(((w1, w2), prox));
},
)?;
let mut key_buffer = bumpalo::collections::Vec::new_in(doc_alloc);
del_word_pair_proximity.sort_unstable();
del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
for ((w1, w2), prox) in del_word_pair_proximity.iter() {
let key = build_key(*prox, w1, w2, &mut key_buffer);
cached_sorter.insert_del_u32(key, document.docid())?;
}
add_word_pair_proximity.sort_unstable();
add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
for ((w1, w2), prox) in add_word_pair_proximity.iter() {
let key = build_key(*prox, w1, w2, &mut key_buffer);
cached_sorter.insert_add_u32(key, document.docid())?;
}
Ok(())
}
}

View File

@@ -2,8 +2,12 @@ mod extract_word_docids;
mod extract_word_pair_proximity_docids;
mod tokenize_document;
pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
pub use extract_word_docids::{
SettingsChangeWordDocidsExtractors, WordDocidsCaches, WordDocidsExtractors,
};
pub use extract_word_pair_proximity_docids::{
SettingsChangeWordPairProximityDocidsExtractors, WordPairProximityDocidsExtractor,
};
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
@@ -27,3 +31,17 @@ pub fn match_searchable_field(
selection
}
/// return `true` if the provided `field_name` is a parent of at least one of the fields contained in `searchable`,
/// or if `searchable` is `None`.
fn has_searchable_children<I, A>(field_name: &str, searchable: Option<I>) -> bool
where
I: IntoIterator<Item = A>,
A: AsRef<str>,
{
searchable.is_none_or(|fields| {
fields
.into_iter()
.any(|attr| match_field_legacy(attr.as_ref(), field_name) == PatternMatch::Parent)
})
}

Some files were not shown because too many files have changed in this diff Show More