diff --git a/.github/workflows/publish-apt-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml index e6adfca57..5b6994dcf 100644 --- a/.github/workflows/publish-apt-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -32,7 +32,7 @@ jobs: - name: Build deb package run: cargo deb -p meilisearch -o target/debian/meilisearch.deb - name: Upload debian pkg to release - uses: svenstaro/upload-release-action@2.7.0 + uses: svenstaro/upload-release-action@2.11.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/debian/meilisearch.deb diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 885a04d0d..3200e778e 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -51,7 +51,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.7.0 + uses: svenstaro/upload-release-action@2.11.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/meilisearch @@ -81,7 +81,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.7.0 + uses: svenstaro/upload-release-action@2.11.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/${{ matrix.artifact_name }} @@ -113,7 +113,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.7.0 + uses: svenstaro/upload-release-action@2.11.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch @@ -178,7 +178,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.7.0 + uses: svenstaro/upload-release-action@2.11.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index ae6532ef9..74384e670 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -106,18 +106,20 @@ jobs: client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }' # Send notification to Swarmia to notify of a deployment: https://app.swarmia.com - - name: Send deployment to Swarmia - if: github.event_name == 'push' && success() - run: | - JSON_STRING=$( jq --null-input --compact-output \ - --arg version "${{ github.ref_name }}" \ - --arg appName "meilisearch" \ - --arg environment "production" \ - --arg commitSha "${{ github.sha }}" \ - --arg repositoryFullName "${{ github.repository }}" \ - '{"version": $version, "appName": $appName, "environment": $environment, "commitSha": $commitSha, "repositoryFullName": $repositoryFullName}' ) + # - name: 'Setup jq' + # uses: dcarbone/install-jq-action + # - name: Send deployment to Swarmia + # if: github.event_name == 'push' && success() + # run: | + # JSON_STRING=$( jq --null-input --compact-output \ + # --arg version "${{ github.ref_name }}" \ + # --arg appName "meilisearch" \ + # --arg environment "production" \ + # --arg commitSha "${{ github.sha }}" \ + # --arg repositoryFullName "${{ github.repository }}" \ + # '{"version": $version, "appName": $appName, "environment": $environment, "commitSha": $commitSha, "repositoryFullName": $repositoryFullName}' ) - curl -H "Authorization: ${{ secrets.SWARMIA_DEPLOYMENTS_AUTHORIZATION }}" \ - -H "Content-Type: application/json" \ - -d "$JSON_STRING" \ - https://hook.swarmia.com/deployments + # curl -H "Authorization: ${{ secrets.SWARMIA_DEPLOYMENTS_AUTHORIZATION }}" \ + # -H "Content-Type: application/json" \ + # -d "$JSON_STRING" \ + # https://hook.swarmia.com/deployments diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 6cf8bfa0f..2924a07bc 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -29,7 +29,7 @@ jobs: - name: Setup test with Rust stable uses: dtolnay/rust-toolchain@1.85 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.8 + uses: Swatinem/rust-cache@v2.8.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -51,7 +51,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.8 + uses: Swatinem/rust-cache@v2.8.0 - uses: dtolnay/rust-toolchain@1.85 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 @@ -155,7 +155,7 @@ jobs: apt-get install build-essential -y - uses: dtolnay/rust-toolchain@1.85 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.8 + uses: Swatinem/rust-cache@v2.8.0 - name: Run tests in debug uses: actions-rs/cargo@v1 with: @@ -172,7 +172,7 @@ jobs: profile: minimal components: clippy - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.8 + uses: Swatinem/rust-cache@v2.8.0 - name: Run cargo clippy uses: actions-rs/cargo@v1 with: @@ -191,7 +191,7 @@ jobs: override: true components: rustfmt - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.8 + uses: Swatinem/rust-cache@v2.8.0 - name: Run cargo fmt # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file. # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate diff --git a/.gitignore b/.gitignore index 07453a58f..fc24b8306 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,8 @@ ## ... unreviewed *.snap.new +# Database snapshot +crates/meilisearch/db.snapshot + # Fuzzcheck data for the facet indexing fuzz test crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e129e5600..57d52116e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,9 +57,17 @@ This command will be triggered to each PR as a requirement for merging it. You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes. It'll store some built artifacts in the directory of your choice. -We recommend using the standard `$HOME/.cache/lindera` directory: +We recommend using the `$HOME/.cache/meili/lindera` directory: ```sh -export LINDERA_CACHE=$HOME/.cache/lindera +export LINDERA_CACHE=$HOME/.cache/meili/lindera +``` + +You can set the `MILLI_BENCH_DATASETS_PATH` environment variable to further speed up your builds. +It'll store some big files used for the benchmarks in the directory of your choice. + +We recommend using the `$HOME/.cache/meili/benches` directory: +```sh +export MILLI_BENCH_DATASETS_PATH=$HOME/.cache/meili/benches ``` Furthermore, you can improve incremental compilation by setting the `MEILI_NO_VERGEN` environment variable. diff --git a/Cargo.lock b/Cargo.lock index a36c568b5..ceec0a05e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ dependencies = [ "actix-utils", "base64 0.22.1", "bitflags 2.9.1", - "brotli 8.0.1", + "brotli", "bytes", "bytestring", "derive_more", @@ -92,6 +92,7 @@ dependencies = [ "bytestring", "cfg-if", "http 0.2.12", + "regex", "regex-lite", "serde", "tracing", @@ -219,6 +220,43 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "actix-web-lab" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33034dd88446a5deb20e42156dbfe43d07e0499345db3ae65b3f51854190531" +dependencies = [ + "actix-http", + "actix-router", + "actix-service", + "actix-utils", + "actix-web", + "ahash 0.8.12", + "arc-swap", + "bytes", + "bytestring", + "csv", + "derive_more", + "form_urlencoded", + "futures-core", + "futures-util", + "http 0.2.12", + "impl-more", + "itertools 0.14.0", + "local-channel", + "mime", + "pin-project-lite", + "regex", + "serde", + "serde_html_form", + "serde_json", + "serde_path_to_error", + "tokio", + "tokio-stream", + "tracing", + "url", +] + [[package]] name = "addr2line" version = "0.24.2" @@ -306,6 +344,12 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "allocator-api2" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78200ac3468a57d333cd0ea5dd398e25111194dcacd49208afca95c629a6311d" + [[package]] name = "anes" version = "0.1.6" @@ -386,6 +430,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + [[package]] name = "arrayvec" version = "0.7.6" @@ -404,7 +454,7 @@ dependencies = [ "heed", "memmap2", "nohash", - "ordered-float", + "ordered-float 4.6.0", "page_size", "rand 0.8.5", "rayon", @@ -424,6 +474,41 @@ dependencies = [ "serde_json", ] +[[package]] +name = "async-openai" +version = "0.28.1" +source = "git+https://github.com/meilisearch/async-openai?branch=better-error-handling#42d05e5f7dd7cdd46115c0855965f0b3f24754a2" +dependencies = [ + "async-openai-macros", + "backoff", + "base64 0.22.1", + "bytes", + "derive_builder 0.20.2", + "eventsource-stream", + "futures", + "rand 0.8.5", + "reqwest", + "reqwest-eventsource", + "secrecy", + "serde", + "serde_json", + "thiserror 2.0.12", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", +] + +[[package]] +name = "async-openai-macros" +version = "0.1.0" +source = "git+https://github.com/meilisearch/async-openai?branch=better-error-handling#42d05e5f7dd7cdd46115c0855965f0b3f24754a2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "async-trait" version = "0.1.88" @@ -447,6 +532,20 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "futures-core", + "getrandom 0.2.16", + "instant", + "pin-project-lite", + "rand 0.8.5", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -468,12 +567,6 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -487,12 +580,12 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2 [[package]] name = "benchmarks" -version = "1.15.0" +version = "1.16.0" dependencies = [ "anyhow", "bumpalo", "bytes", - "convert_case", + "convert_case 0.8.0", "criterion", "csv", "flate2", @@ -643,17 +736,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "brotli" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor 4.0.3", -] - [[package]] name = "brotli" version = "8.0.1" @@ -662,17 +744,7 @@ checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", - "brotli-decompressor 5.0.0", -] - -[[package]] -name = "brotli-decompressor" -version = "4.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "brotli-decompressor", ] [[package]] @@ -698,7 +770,7 @@ dependencies = [ [[package]] name = "build-info" -version = "1.15.0" +version = "1.16.0" dependencies = [ "anyhow", "time", @@ -707,11 +779,11 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" dependencies = [ - "allocator-api2", + "allocator-api2 0.2.21", "serde", ] @@ -721,10 +793,10 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8" dependencies = [ - "allocator-api2", + "allocator-api2 0.2.21", "bitpacking", "bumpalo", - "hashbrown 0.15.3", + "hashbrown 0.15.4", "serde", "serde_json", ] @@ -764,15 +836,15 @@ dependencies = [ [[package]] name = "bytecount" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "bytemuck" -version = "1.23.0" +version = "1.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" +checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" dependencies = [ "bytemuck_derive", ] @@ -830,18 +902,18 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.9" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" +checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab" dependencies = [ "serde", ] [[package]] name = "candle-core" -version = "0.8.4" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ccf5ee3532e66868516d9b315f73aec9f34ea1a37ae98514534d458915dbf1" +checksum = "a9f51e2ecf6efe9737af8f993433c839f956d2b6ed4fd2dd4a7c6d8b0fa667ff" dependencies = [ "byteorder", "candle-kernels", @@ -864,18 +936,18 @@ dependencies = [ [[package]] name = "candle-kernels" -version = "0.8.4" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a10885bd902fad1b8518ba2b22369aaed88a3d94e123533ad3ca73db33b1c8ca" +checksum = "9fcd989c2143aa754370b5bfee309e35fbd259e83d9ecf7a73d23d8508430775" dependencies = [ "bindgen_cuda", ] [[package]] name = "candle-nn" -version = "0.8.4" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1160c3b63f47d40d91110a3e1e1e566ae38edddbbf492a60b40ffc3bc1ff38" +checksum = "c1980d53280c8f9e2c6cbe1785855d7ff8010208b46e21252b978badf13ad69d" dependencies = [ "candle-core", "half", @@ -888,9 +960,9 @@ dependencies = [ [[package]] name = "candle-transformers" -version = "0.8.4" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94a0900d49f8605e0e7e6693a1f560e6271279de98e5fa369e7abf3aac245020" +checksum = "186cb80045dbe47e0b387ea6d3e906f02fb3056297080d9922984c90e90a72b0" dependencies = [ "byteorder", "candle-core", @@ -907,21 +979,38 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.9" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea" +checksum = "84982c6c0ae343635a3a4ee6dedef965513735c8b183caa7289fa6e27399ebd4" dependencies = [ "serde", ] [[package]] -name = "cargo_metadata" -version = "0.19.2" +name = "cargo-util-schemas" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd5eb614ed4c27c5d706420e4320fbe3216ab31fa1c33cd8246ac36dae4479ba" +checksum = "e63d2780ac94487eb9f1fea7b0d56300abc9eb488800854ca217f102f5caccca" +dependencies = [ + "semver", + "serde", + "serde-untagged", + "serde-value", + "thiserror 1.0.69", + "toml", + "unicode-xid", + "url", +] + +[[package]] +name = "cargo_metadata" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f7835cfc6135093070e95eb2b53e5d9b5c403dc3a6be6040ee026270aa82502" dependencies = [ "camino", "cargo-platform", + "cargo-util-schemas", "semver", "serde", "serde_json", @@ -930,9 +1019,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.21.0" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fbd1fe9db3ebf71b89060adaf7b0504c2d6a425cf061313099547e382c2e472" +checksum = "02260d489095346e5cafd04dea8e8cb54d1d74fcd759022a9b72986ebe9a1257" dependencies = [ "serde", "toml", @@ -946,9 +1035,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.24" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" +checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" dependencies = [ "jobserver", "libc", @@ -997,9 +1086,9 @@ dependencies = [ [[package]] name = "charabia" -version = "0.9.5" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da3b398d57d5526189869b32ac0b4f7fb436f490f47a2a19685cee634df72d2" +checksum = "3b01abfd2db0eb8c4e7a47ccab5d1f67993736f4e76923ed9ae281c49070645d" dependencies = [ "aho-corasick", "csv", @@ -1067,9 +1156,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", "clap_derive", @@ -1077,9 +1166,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", @@ -1089,9 +1178,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.32" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" dependencies = [ "heck", "proc-macro2", @@ -1107,9 +1196,9 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "color-spantrace" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ddd8d5bfda1e11a501d0a7303f3bfed9aa632ebdb859be40d0fd70478ed70d5" +checksum = "b8b88ea9df13354b55bc7234ebcce36e6ef896aca2e42a15de9e10edce01b427" dependencies = [ "once_cell", "owo-colors", @@ -1182,6 +1271,15 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "convert_case" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cookie" version = "0.16.2" @@ -1193,6 +1291,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1217,21 +1325,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crc" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" -dependencies = [ - "crc-catalog", -] - -[[package]] -name = "crc-catalog" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" - [[package]] name = "crc32fast" version = "1.4.2" @@ -1243,25 +1336,22 @@ dependencies = [ [[package]] name = "criterion" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", - "is-terminal", - "itertools 0.10.5", + "itertools 0.13.0", "num-traits", - "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -1359,9 +1449,9 @@ dependencies = [ [[package]] name = "cudarc" -version = "0.13.9" +version = "0.16.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486c221362668c63a1636cfa51463b09574433b39029326cff40864b3ba12b6e" +checksum = "f9574894139a982bf26fbb44473a9d416c015e779c51ef0fbc0789f1a1c17b25" dependencies = [ "half", "libloading", @@ -1603,7 +1693,7 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aadef696fce456c704f10186def1bdc0a40e646c9f4f18cf091477acadb731d8" dependencies = [ - "convert_case", + "convert_case 0.6.0", "proc-macro2", "quote", "syn 2.0.101", @@ -1684,7 +1774,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.15.0" +version = "1.16.0" dependencies = [ "anyhow", "big_s", @@ -1860,6 +1950,16 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "erased-serde" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e004d887f51fcb9fef17317a2f3525c887d8aa3f4f50fed920816a688284a5b7" +dependencies = [ + "serde", + "typeid", +] + [[package]] name = "errno" version = "0.3.12" @@ -1876,6 +1976,17 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +[[package]] +name = "eventsource-stream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" +dependencies = [ + "futures-core", + "nom", + "pin-project-lite", +] + [[package]] name = "fancy-regex" version = "0.13.0" @@ -1895,7 +2006,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "file-store" -version = "1.15.0" +version = "1.16.0" dependencies = [ "tempfile", "thiserror 2.0.12", @@ -1917,7 +2028,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.15.0" +version = "1.16.0" dependencies = [ "insta", "nom", @@ -1927,17 +2038,18 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" dependencies = [ "crc32fast", + "libz-rs-sys", "miniz_oxide", ] [[package]] name = "flatten-serde-json" -version = "1.15.0" +version = "1.16.0" dependencies = [ "criterion", "serde_json", @@ -1949,7 +2061,7 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" dependencies = [ - "spin", + "spin 0.9.8", ] [[package]] @@ -2056,6 +2168,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2076,7 +2194,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.15.0" +version = "1.16.0" dependencies = [ "arbitrary", "bumpalo", @@ -2507,16 +2625,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash 0.8.12", - "allocator-api2", + "allocator-api2 0.2.21", ] [[package]] name = "hashbrown" -version = "0.15.3" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" dependencies = [ - "allocator-api2", + "allocator-api2 0.2.21", "equivalent", "foldhash", "serde", @@ -2575,12 +2693,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hermit-abi" version = "0.5.1" @@ -2706,6 +2818,7 @@ dependencies = [ "hyper", "hyper-util", "rustls", + "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls", @@ -2881,15 +2994,16 @@ dependencies = [ [[package]] name = "index-scheduler" -version = "1.15.0" +version = "1.16.0" dependencies = [ "anyhow", + "backoff", "big_s", "bincode", "bumpalo", "bumparaw-collections", "byte-unit", - "convert_case", + "convert_case 0.8.0", "crossbeam-channel", "csv", "derive_builder 0.20.2", @@ -2925,7 +3039,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.3", + "hashbrown 0.15.4", "serde", ] @@ -3008,7 +3122,7 @@ version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ - "hermit-abi 0.5.1", + "hermit-abi", "libc", "windows-sys 0.59.0", ] @@ -3116,7 +3230,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.15.0" +version = "1.16.0" dependencies = [ "criterion", "serde_json", @@ -3226,7 +3340,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.53.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "liblzma" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66352d7a8ac12d4877b6e6ea5a9b7650ee094257dc40889955bea5bc5b08c1d0" +dependencies = [ + "liblzma-sys", +] + +[[package]] +name = "liblzma-sys" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736" +dependencies = [ + "cc", + "libc", + "pkg-config", ] [[package]] @@ -3237,9 +3371,9 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libmimalloc-sys" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec9d6fac27761dabcd4ee73571cdb06b7022dc99089acbe5435691edffaac0f4" +checksum = "bf88cd67e9de251c1781dbe2f641a1a3ad66eaae831b8a2c38fbdc5ddae16d4d" dependencies = [ "cc", "libc", @@ -3267,6 +3401,15 @@ dependencies = [ "redox_syscall", ] +[[package]] +name = "libz-rs-sys" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +dependencies = [ + "zlib-rs", +] + [[package]] name = "libz-sys" version = "1.1.22" @@ -3281,9 +3424,9 @@ dependencies = [ [[package]] name = "lindera" -version = "0.42.4" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b6ee48fa4ffaff0b34a0f56e8fe9e3a9f38ff097d7ffe11a189acac242efbf" +checksum = "f20720cb4206e87b6844b05c66b23301e7bb532718f200ff55bbbdfbce9b7f2b" dependencies = [ "anyhow", "bincode", @@ -3311,9 +3454,9 @@ dependencies = [ [[package]] name = "lindera-cc-cedict" -version = "0.42.4" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88fb51b5730fd63b1baf677fb19ce3f3f00616a3fbaf430f923b676dce5fab39" +checksum = "0f6ddd4aeaeaf1ce47ea5785bd6a273179d32df4af4b306d9b65a7a7f81a0e61" dependencies = [ "bincode", "byteorder", @@ -3324,9 +3467,9 @@ dependencies = [ [[package]] name = "lindera-dictionary" -version = "0.42.4" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5dafa44610860d21f66dbfee1ad387fd127824b204137b540ada4c1a744b19c" +checksum = "f9b5e417c4c6e001459e019b178f65f759be9c2cbf2d9bd803ec5d8ed0e62124" dependencies = [ "anyhow", "bincode", @@ -3352,9 +3495,9 @@ dependencies = [ [[package]] name = "lindera-ipadic" -version = "0.42.4" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273907fdf1c14a8244a370afd7ac79126337ad450d25888b1613aee17b1262a" +checksum = "c2867975f1b92d1093ccbb52c5c1664a56dfbd27a2fece0166c765ad1f043f31" dependencies = [ "bincode", "byteorder", @@ -3365,9 +3508,9 @@ dependencies = [ [[package]] name = "lindera-ipadic-neologd" -version = "0.42.4" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d4371fbd6dc3ac5cc76990ed41061c553635f67953771159e4061d7f568d14f" +checksum = "c54c4c2d3fb8b380d0ace5ae97111ca444bcfa7721966f552117d57f07d8b3b1" dependencies = [ "bincode", "byteorder", @@ -3378,9 +3521,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic" -version = "0.42.4" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03f35d8e54e6d5f73e9f76da0fedfa336fa60a6d2ac7f7dcc8bcd15e338db291" +checksum = "7f495e64f62deee60d9b71dbe3fd39b69b8688c9d591842f81f94e200eb4d81f" dependencies = [ "bincode", "byteorder", @@ -3391,9 +3534,9 @@ dependencies = [ [[package]] name = "lindera-unidic" -version = "0.42.4" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "661aa828cf6af7ccd1c0c1142c087fd048af5f83776ccec6af9f9c56448bc626" +checksum = "e85ff97ce04c519fbca0f05504ea028761ccc456b1e84cf1e75fac57f9b3caf1" dependencies = [ "bincode", "byteorder", @@ -3510,9 +3653,9 @@ checksum = "4d873d7c67ce09b42110d801813efbc9364414e356be9935700d368351657487" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" dependencies = [ "autocfg", "scopeguard", @@ -3526,11 +3669,11 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lru" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465" +checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" dependencies = [ - "hashbrown 0.15.3", + "hashbrown 0.15.4", ] [[package]] @@ -3539,32 +3682,11 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" -[[package]] -name = "lzma-rs" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e" -dependencies = [ - "byteorder", - "crc", -] - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "macro_rules_attribute" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" dependencies = [ "macro_rules_attribute-proc_macro", "paste", @@ -3572,9 +3694,9 @@ dependencies = [ [[package]] name = "macro_rules_attribute-proc_macro" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" [[package]] name = "manifest-dir-macros" @@ -3602,7 +3724,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.15.0" +version = "1.16.0" dependencies = [ "insta", "md5", @@ -3613,18 +3735,20 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.15.0" +version = "1.16.0" dependencies = [ "actix-cors", "actix-http", "actix-rt", "actix-utils", "actix-web", + "actix-web-lab", "anyhow", - "async-trait", - "brotli 6.0.0", + "async-openai", + "brotli", "bstr", "build-info", + "bumpalo", "byte-unit", "bytes", "cargo_toml", @@ -3657,7 +3781,7 @@ dependencies = [ "num_cpus", "obkv", "once_cell", - "ordered-float", + "ordered-float 5.0.0", "parking_lot", "permissive-json-pointer", "pin-project-lite", @@ -3671,6 +3795,7 @@ dependencies = [ "rustls", "rustls-pemfile", "rustls-pki-types", + "secrecy", "segment", "serde", "serde_json", @@ -3700,12 +3825,12 @@ dependencies = [ "uuid", "wiremock", "yaup", - "zip 2.4.2", + "zip 4.1.0", ] [[package]] name = "meilisearch-auth" -version = "1.15.0" +version = "1.16.0" dependencies = [ "base64 0.22.1", "enum-iterator", @@ -3724,13 +3849,14 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.15.0" +version = "1.16.0" dependencies = [ "actix-web", "anyhow", "bumpalo", "bumparaw-collections", - "convert_case", + "byte-unit", + "convert_case 0.8.0", "csv", "deserr", "either", @@ -3758,7 +3884,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.15.0" +version = "1.16.0" dependencies = [ "anyhow", "clap", @@ -3776,9 +3902,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "memmap2" @@ -3792,9 +3918,9 @@ dependencies = [ [[package]] name = "milli" -version = "1.15.0" +version = "1.16.0" dependencies = [ - "allocator-api2", + "allocator-api2 0.3.0", "arroy", "bbqueue", "big_s", @@ -3810,7 +3936,7 @@ dependencies = [ "candle-transformers", "charabia", "concat-arrays", - "convert_case", + "convert_case 0.8.0", "crossbeam-channel", "csv", "deserr", @@ -3823,7 +3949,7 @@ dependencies = [ "fxhash", "geoutils", "grenad", - "hashbrown 0.15.3", + "hashbrown 0.15.4", "heed", "hf-hub", "indexmap", @@ -3841,10 +3967,9 @@ dependencies = [ "mimalloc", "obkv", "once_cell", - "ordered-float", + "ordered-float 5.0.0", "rand 0.8.5", "rayon", - "rayon-par-bridge", "rhai", "roaring", "rstar", @@ -3862,7 +3987,6 @@ dependencies = [ "time", "tokenizers", "tracing", - "uell", "ureq", "url", "utoipa", @@ -3871,9 +3995,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.46" +version = "0.1.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "995942f432bbb4822a7e9c3faa87a695185b0d09273ba85f097b54f4e458f2af" +checksum = "b1791cbe101e95af5764f06f20f6760521f7158f69dbf9d6baf941ee1bf6bc40" dependencies = [ "libmimalloc-sys", ] @@ -3954,6 +4078,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e94e1e6445d314f972ff7395df2de295fe51b71821694f0b0e1e79c4f12c8577" +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" +dependencies = [ + "spin 0.5.2", +] + [[package]] name = "nohash" version = "0.2.0" @@ -4083,11 +4216,11 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi 0.3.9", + "hermit-abi", "libc", ] @@ -4127,6 +4260,25 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "objc2-core-foundation" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" +dependencies = [ + "bitflags 2.9.1", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71c1c64d6120e51cd86033f67176b1cb66780c2efe34dec55176f77befd93c0a" +dependencies = [ + "libc", + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.36.7" @@ -4147,6 +4299,9 @@ name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +dependencies = [ + "portable-atomic", +] [[package]] name = "once_cell_polyfill" @@ -4182,12 +4337,27 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + [[package]] name = "option-ext" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-float" version = "4.6.0" @@ -4197,6 +4367,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-float" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" +dependencies = [ + "num-traits", +] + [[package]] name = "overload" version = "0.1.1" @@ -4221,9 +4400,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" dependencies = [ "lock_api", "parking_lot_core", @@ -4231,9 +4410,9 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ "cfg-if", "libc", @@ -4291,7 +4470,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.15.0" +version = "1.16.0" dependencies = [ "big_s", "serde_json", @@ -4822,15 +5001,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "rayon-par-bridge" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6a14d8f65834aca6b0fe4cbbd7a27e639cd3efb1f2a32de9942368f1991de8" -dependencies = [ - "rayon", -] - [[package]] name = "reborrow" version = "0.5.5" @@ -4903,9 +5073,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.16" +version = "0.12.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bf597b113be201cb2269b4c39b39a804d01b99ee95a4278f0ed04e45cff1c71" +checksum = "eabf4c97d9130e2bf606614eb937e86edac8292eaa6f422f995d7e8de1eb1813" dependencies = [ "base64 0.22.1", "bytes", @@ -4918,15 +5088,14 @@ dependencies = [ "hyper", "hyper-rustls", "hyper-util", - "ipnet", "js-sys", "log", - "mime", - "once_cell", + "mime_guess", "percent-encoding", "pin-project-lite", "quinn", "rustls", + "rustls-native-certs", "rustls-pki-types", "serde", "serde_json", @@ -4946,14 +5115,32 @@ dependencies = [ "webpki-roots 1.0.0", ] +[[package]] +name = "reqwest-eventsource" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde" +dependencies = [ + "eventsource-stream", + "futures-core", + "futures-timer", + "mime", + "nom", + "pin-project-lite", + "reqwest", + "thiserror 1.0.69", +] + [[package]] name = "rhai" -version = "1.20.0" -source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4" +version = "1.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2780e813b755850e50b178931aaf94ed24f6817f46aaaf5d21c13c12d939a249" dependencies = [ "ahash 0.8.12", "bitflags 2.9.1", "instant", + "no-std-compat", "num-traits", "once_cell", "rhai_codegen", @@ -4966,7 +5153,8 @@ dependencies = [ [[package]] name = "rhai_codegen" version = "2.2.0" -source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5a11a05ee1ce44058fa3d5961d05194fdbe3ad6b40f904af764d81b86450e6b" dependencies = [ "proc-macro2", "quote", @@ -5107,9 +5295,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.27" +version = "0.23.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321" +checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" dependencies = [ "log", "once_cell", @@ -5120,6 +5308,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "2.2.0" @@ -5181,6 +5381,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -5193,6 +5402,39 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" +[[package]] +name = "secrecy" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a" +dependencies = [ + "serde", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" +dependencies = [ + "bitflags 2.9.1", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "segment" version = "0.2.6" @@ -5240,6 +5482,27 @@ dependencies = [ "serde", ] +[[package]] +name = "serde-untagged" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "299d9c19d7d466db4ab10addd5703e4c615dec2a5a16dbbafe191045e87ee66e" +dependencies = [ + "erased-serde", + "serde", + "typeid", +] + +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float 2.10.1", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.219" @@ -5251,6 +5514,19 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "serde_html_form" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d2de91cf02bbc07cde38891769ccd5d4f073d22a40683aa4bc7a95781aaa2c4" +dependencies = [ + "form_urlencoded", + "indexmap", + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_json" version = "1.0.140" @@ -5264,6 +5540,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a" +dependencies = [ + "itoa", + "serde", +] + [[package]] name = "serde_plain" version = "1.0.2" @@ -5275,9 +5561,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" dependencies = [ "serde", ] @@ -5427,9 +5713,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" dependencies = [ "serde", ] @@ -5467,6 +5753,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" @@ -5496,9 +5788,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "static-files" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e8590e848e1c53be9258210bcd4a8f4118e08988f03a4e2d63b62e4ad9f7ced" +checksum = "f9c425c07353535ef55b45420f5a8b0a397cd9bc3d7e5236497ca0d90604aa9b" dependencies = [ "change-detection", "mime_guess", @@ -5632,15 +5924,15 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.33.1" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc858248ea01b66f19d8e8a6d55f41deaf91e9d495246fd01368d99935c6c01" +checksum = "3c3ffa3e4ff2b324a57f7aeb3c349656c7b127c3c189520251a648102a92496e" dependencies = [ - "core-foundation-sys", "libc", "memchr", "ntapi", - "rayon", + "objc2-core-foundation", + "objc2-io-kit", "windows", ] @@ -5743,26 +6035,24 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ "cfg-if", - "once_cell", ] [[package]] name = "tiktoken-rs" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6" +checksum = "25563eeba904d770acf527e8b370fe9a5547bacd20ff84a0b6c3bc41288e5625" dependencies = [ "anyhow", - "base64 0.21.7", + "base64 0.22.1", "bstr", "fancy-regex", "lazy_static", - "parking_lot", "regex", "rustc-hash 1.1.0", ] @@ -5913,6 +6203,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.15" @@ -5928,9 +6229,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.22" +version = "0.8.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", "serde_spanned", @@ -5940,18 +6241,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.9" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.26" +version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap", "serde", @@ -5963,9 +6264,9 @@ dependencies = [ [[package]] name = "toml_write" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tower" @@ -5984,9 +6285,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.4" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fdb0c213ca27a9f57ab69ddb290fd80d970922355b83ae380b395d3986b8a2e" +checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ "bitflags 2.9.1", "bytes", @@ -6128,6 +6429,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "typenum" version = "1.18.0" @@ -6140,20 +6447,11 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" -[[package]] -name = "uell" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40de5982e28612e20330e77d81f1559b74f66caf3c7fc10b19ada4843f4b4fd7" -dependencies = [ - "bumpalo", -] - [[package]] name = "ug" -version = "0.1.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03719c61a91b51541f076dfdba45caacf750b230cefaa4b32d6f5411c3f7f437" +checksum = "90b70b37e9074642bc5f60bb23247fd072a84314ca9e71cdf8527593406a0dd3" dependencies = [ "gemm 0.18.2", "half", @@ -6172,9 +6470,9 @@ dependencies = [ [[package]] name = "ug-cuda" -version = "0.1.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50758486d7941f8b0a636ba7e29455c07071f41590beac1fd307ec893e8db69a" +checksum = "14053653d0b7fa7b21015aa9a62edc8af2f60aa6f9c54e66386ecce55f22ed29" dependencies = [ "cudarc", "half", @@ -6321,9 +6619,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utoipa" -version = "5.3.1" +version = "5.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435c6f69ef38c9017b4b4eea965dfb91e71e53d869e896db40d1cf2441dd75c0" +checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993" dependencies = [ "indexmap", "serde", @@ -6333,9 +6631,9 @@ dependencies = [ [[package]] name = "utoipa-gen" -version = "5.3.1" +version = "5.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77d306bc75294fd52f3e99b13ece67c02c1a2789190a6f31d32f736624326f7" +checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b" dependencies = [ "proc-macro2", "quote", @@ -6634,31 +6932,55 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.57.0" +version = "0.61.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-link", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ "windows-core", - "windows-targets 0.52.6", ] [[package]] name = "windows-core" -version = "0.57.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", + "windows-link", "windows-result", - "windows-targets 0.52.6", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", ] [[package]] name = "windows-implement" -version = "0.57.0" +version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", @@ -6667,9 +6989,9 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.57.0" +version = "0.59.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", @@ -6677,12 +6999,37 @@ dependencies = [ ] [[package]] -name = "windows-result" -version = "0.1.2" +name = "windows-link" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ - "windows-targets 0.52.6", + "windows-core", + "windows-link", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", ] [[package]] @@ -6736,7 +7083,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -6744,19 +7091,12 @@ dependencies = [ ] [[package]] -name = "windows-targets" -version = "0.53.0" +name = "windows-threading" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" dependencies = [ - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows-link", ] [[package]] @@ -6771,12 +7111,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -6789,12 +7123,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -6807,24 +7135,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -6837,12 +7153,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -6855,12 +7165,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -6873,12 +7177,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -6891,12 +7189,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" - [[package]] name = "winnow" version = "0.7.10" @@ -6966,7 +7258,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.15.0" +version = "1.16.0" dependencies = [ "anyhow", "build-info", @@ -6987,15 +7279,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yada" version = "0.5.1" @@ -7172,34 +7455,36 @@ dependencies = [ [[package]] name = "zip" -version = "2.4.2" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +checksum = "af7dcdb4229c0e79c2531a24de7726a0e980417a74fb4d030a35f535665439a0" dependencies = [ "aes", "arbitrary", "bzip2", "constant_time_eq", "crc32fast", - "crossbeam-utils", "deflate64", - "displaydoc", "flate2", "getrandom 0.3.3", "hmac", "indexmap", - "lzma-rs", + "liblzma", "memchr", "pbkdf2", "sha1", - "thiserror 2.0.12", "time", - "xz2", "zeroize", "zopfli", "zstd", ] +[[package]] +name = "zlib-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" + [[package]] name = "zopfli" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index ce4b806f9..3e57563b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ members = [ ] [workspace.package] -version = "1.15.0" +version = "1.16.0" authors = [ "Quentin de Quelen ", "Clément Renault ", diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index a2cddd554..9dccc444b 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -11,27 +11,27 @@ edition.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.95" -bumpalo = "3.16.0" +anyhow = "1.0.98" +bumpalo = "3.18.1" csv = "1.3.1" memmap2 = "0.9.5" milli = { path = "../milli" } -mimalloc = { version = "0.1.43", default-features = false } -serde_json = { version = "1.0.135", features = ["preserve_order"] } -tempfile = "3.15.0" +mimalloc = { version = "0.1.47", default-features = false } +serde_json = { version = "1.0.140", features = ["preserve_order"] } +tempfile = "3.20.0" [dev-dependencies] -criterion = { version = "0.5.1", features = ["html_reports"] } +criterion = { version = "0.6.0", features = ["html_reports"] } rand = "0.8.5" rand_chacha = "0.3.1" -roaring = "0.10.10" +roaring = "0.10.12" [build-dependencies] -anyhow = "1.0.95" -bytes = "1.9.0" -convert_case = "0.6.0" -flate2 = "1.0.35" -reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false } +anyhow = "1.0.98" +bytes = "1.10.1" +convert_case = "0.8.0" +flate2 = "1.1.2" +reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false } [features] default = ["milli/all-tokenizations"] diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs index 9199c3877..4083b69dd 100644 --- a/crates/benchmarks/benches/indexing.rs +++ b/crates/benchmarks/benches/indexing.rs @@ -11,7 +11,7 @@ use milli::heed::{EnvOpenOptions, RwTxn}; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; -use milli::vector::EmbeddingConfigs; +use milli::vector::RuntimeEmbedders; use milli::{FilterableAttributesRule, Index}; use rand::seq::SliceRandom; use rand_chacha::rand_core::SeedableRng; @@ -65,7 +65,7 @@ fn setup_settings<'t>( let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); builder.set_sortable_fields(sortable_fields); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); } fn setup_index_with_settings( @@ -166,9 +166,10 @@ fn indexing_songs_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -232,9 +233,10 @@ fn reindexing_songs_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -276,9 +278,10 @@ fn reindexing_songs_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -344,9 +347,10 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -420,9 +424,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -464,9 +469,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -504,9 +510,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -571,9 +578,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -637,9 +645,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -703,9 +712,10 @@ fn indexing_wiki(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -768,9 +778,10 @@ fn reindexing_wiki(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -812,9 +823,10 @@ fn reindexing_wiki(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -879,9 +891,10 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -955,9 +968,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -1000,9 +1014,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -1041,9 +1056,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -1107,9 +1123,10 @@ fn indexing_movies_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -1172,9 +1189,10 @@ fn reindexing_movies_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -1216,9 +1234,10 @@ fn reindexing_movies_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -1283,9 +1302,10 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -1331,9 +1351,10 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec Index { (conf.configure)(&mut builder); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let config = IndexerConfig::default(); @@ -125,9 +125,10 @@ pub fn base_setup(conf: &Conf) -> Index { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); diff --git a/crates/benchmarks/build.rs b/crates/benchmarks/build.rs index d7b99db37..88d8e7c5f 100644 --- a/crates/benchmarks/build.rs +++ b/crates/benchmarks/build.rs @@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> { writeln!( &mut manifest_paths_file, r#"pub const {}: &str = {:?};"#, - dataset.to_case(Case::ScreamingSnake), + dataset.to_case(Case::UpperSnake), out_file.display(), )?; diff --git a/crates/build-info/Cargo.toml b/crates/build-info/Cargo.toml index f8ede756e..ca8754b81 100644 --- a/crates/build-info/Cargo.toml +++ b/crates/build-info/Cargo.toml @@ -11,8 +11,8 @@ license.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -time = { version = "0.3.37", features = ["parsing"] } +time = { version = "0.3.41", features = ["parsing"] } [build-dependencies] -anyhow = "1.0.95" -vergen-git2 = "1.0.2" +anyhow = "1.0.98" +vergen-git2 = "1.0.7" diff --git a/crates/dump/Cargo.toml b/crates/dump/Cargo.toml index 5c427916c..4b8a49aa0 100644 --- a/crates/dump/Cargo.toml +++ b/crates/dump/Cargo.toml @@ -11,21 +11,21 @@ readme.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.95" -flate2 = "1.0.35" -http = "1.2.0" +anyhow = "1.0.98" +flate2 = "1.1.2" +http = "1.3.1" meilisearch-types = { path = "../meilisearch-types" } -once_cell = "1.20.2" +once_cell = "1.21.3" regex = "1.11.1" -roaring = { version = "0.10.10", features = ["serde"] } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.135", features = ["preserve_order"] } -tar = "0.4.43" -tempfile = "3.15.0" -thiserror = "2.0.9" -time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] } +roaring = { version = "0.10.12", features = ["serde"] } +serde = { version = "1.0.219", features = ["derive"] } +serde_json = { version = "1.0.140", features = ["preserve_order"] } +tar = "0.4.44" +tempfile = "3.20.0" +thiserror = "2.0.12" +time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] } tracing = "0.1.41" -uuid = { version = "1.11.0", features = ["serde", "v4"] } +uuid = { version = "1.17.0", features = ["serde", "v4"] } [dev-dependencies] big_s = "1.0.2" diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs index 95d75700e..81ba40944 100644 --- a/crates/dump/src/lib.rs +++ b/crates/dump/src/lib.rs @@ -1,12 +1,17 @@ #![allow(clippy::type_complexity)] #![allow(clippy::wrong_self_convention)] +use std::collections::BTreeMap; + use meilisearch_types::batches::BatchId; +use meilisearch_types::byte_unit::Byte; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::Key; use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::settings::Unchecked; -use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId}; +use meilisearch_types::tasks::{ + Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, +}; use meilisearch_types::InstanceUid; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; @@ -141,6 +146,12 @@ pub enum KindDump { instance_uid: Option, }, SnapshotCreation, + Export { + url: String, + api_key: Option, + payload_size: Option, + indexes: BTreeMap, + }, UpgradeDatabase { from: (u32, u32, u32), }, @@ -213,6 +224,15 @@ impl From for KindDump { KindDump::DumpCreation { keys, instance_uid } } KindWithContent::SnapshotCreation => KindDump::SnapshotCreation, + KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export { + url, + api_key, + payload_size, + indexes: indexes + .into_iter() + .map(|(pattern, settings)| (pattern.to_string(), settings)) + .collect(), + }, KindWithContent::UpgradeDatabase { from: version } => { KindDump::UpgradeDatabase { from: version } } @@ -305,6 +325,7 @@ pub(crate) mod test { localized_attributes: Setting::NotSet, facet_search: Setting::NotSet, prefix_search: Setting::NotSet, + chat: Setting::NotSet, _kind: std::marker::PhantomData, }; settings.check() @@ -328,6 +349,7 @@ pub(crate) mod test { write_channel_congestion: None, internal_database_sizes: Default::default(), }, + embedder_stats: Default::default(), enqueued_at: Some(BatchEnqueuedAt { earliest: datetime!(2022-11-11 0:00 UTC), oldest: datetime!(2022-11-11 0:00 UTC), diff --git a/crates/dump/src/reader/compat/v5_to_v6.rs b/crates/dump/src/reader/compat/v5_to_v6.rs index b4a4fcb24..f7bda81c6 100644 --- a/crates/dump/src/reader/compat/v5_to_v6.rs +++ b/crates/dump/src/reader/compat/v5_to_v6.rs @@ -1,3 +1,4 @@ +use std::num::NonZeroUsize; use std::str::FromStr; use super::v4_to_v5::{CompatIndexV4ToV5, CompatV4ToV5}; @@ -388,7 +389,13 @@ impl From> for v6::Settings { }, pagination: match settings.pagination { v5::Setting::Set(pagination) => v6::Setting::Set(v6::PaginationSettings { - max_total_hits: pagination.max_total_hits.into(), + max_total_hits: match pagination.max_total_hits { + v5::Setting::Set(max_total_hits) => v6::Setting::Set( + max_total_hits.try_into().unwrap_or(NonZeroUsize::new(1).unwrap()), + ), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, }), v5::Setting::Reset => v6::Setting::Reset, v5::Setting::NotSet => v6::Setting::NotSet, @@ -398,6 +405,7 @@ impl From> for v6::Settings { search_cutoff_ms: v6::Setting::NotSet, facet_search: v6::Setting::NotSet, prefix_search: v6::Setting::NotSet, + chat: v6::Setting::NotSet, _kind: std::marker::PhantomData, } } diff --git a/crates/dump/src/reader/mod.rs b/crates/dump/src/reader/mod.rs index 2b4440ab7..23e7eec9e 100644 --- a/crates/dump/src/reader/mod.rs +++ b/crates/dump/src/reader/mod.rs @@ -116,6 +116,15 @@ impl DumpReader { } } + pub fn chat_completions_settings( + &mut self, + ) -> Result> + '_>> { + match self { + DumpReader::Current(current) => current.chat_completions_settings(), + DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())), + } + } + pub fn features(&self) -> Result> { match self { DumpReader::Current(current) => Ok(current.features()), diff --git a/crates/dump/src/reader/v6/mod.rs b/crates/dump/src/reader/v6/mod.rs index 0b4ba5bdd..449a7e5fe 100644 --- a/crates/dump/src/reader/v6/mod.rs +++ b/crates/dump/src/reader/v6/mod.rs @@ -1,3 +1,4 @@ +use std::ffi::OsStr; use std::fs::{self, File}; use std::io::{BufRead, BufReader, ErrorKind}; use std::path::Path; @@ -21,6 +22,7 @@ pub type Unchecked = meilisearch_types::settings::Unchecked; pub type Task = crate::TaskDump; pub type Batch = meilisearch_types::batches::Batch; pub type Key = meilisearch_types::keys::Key; +pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings; pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; pub type Network = meilisearch_types::features::Network; @@ -192,6 +194,34 @@ impl V6Reader { ) } + pub fn chat_completions_settings( + &mut self, + ) -> Result> + '_>> { + let entries = match fs::read_dir(self.dump.path().join("chat-completions-settings")) { + Ok(entries) => entries, + Err(e) if e.kind() == ErrorKind::NotFound => return Ok(Box::new(std::iter::empty())), + Err(e) => return Err(e.into()), + }; + Ok(Box::new( + entries + .map(|entry| -> Result> { + let entry = entry?; + let file_name = entry.file_name(); + let path = Path::new(&file_name); + if entry.file_type()?.is_file() && path.extension() == Some(OsStr::new("json")) + { + let name = path.file_stem().unwrap().to_str().unwrap().to_string(); + let file = File::open(entry.path())?; + let settings = serde_json::from_reader(file)?; + Ok(Some((name, settings))) + } else { + Ok(None) + } + }) + .filter_map(|entry| entry.transpose()), + )) + } + pub fn features(&self) -> Option { self.features } diff --git a/crates/dump/src/writer.rs b/crates/dump/src/writer.rs index 63b006b5c..9f828595a 100644 --- a/crates/dump/src/writer.rs +++ b/crates/dump/src/writer.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use flate2::write::GzEncoder; use flate2::Compression; use meilisearch_types::batches::Batch; -use meilisearch_types::features::{Network, RuntimeTogglableFeatures}; +use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures}; use meilisearch_types::keys::Key; use meilisearch_types::settings::{Checked, Settings}; use serde_json::{Map, Value}; @@ -51,6 +51,10 @@ impl DumpWriter { KeyWriter::new(self.dir.path().to_path_buf()) } + pub fn create_chat_completions_settings(&self) -> Result { + ChatCompletionsSettingsWriter::new(self.dir.path().join("chat-completions-settings")) + } + pub fn create_tasks_queue(&self) -> Result { TaskWriter::new(self.dir.path().join("tasks")) } @@ -104,6 +108,24 @@ impl KeyWriter { } } +pub struct ChatCompletionsSettingsWriter { + path: PathBuf, +} + +impl ChatCompletionsSettingsWriter { + pub(crate) fn new(path: PathBuf) -> Result { + std::fs::create_dir(&path)?; + Ok(ChatCompletionsSettingsWriter { path }) + } + + pub fn push_settings(&mut self, name: &str, settings: &ChatCompletionSettings) -> Result<()> { + let mut settings_file = File::create(self.path.join(name).with_extension("json"))?; + serde_json::to_writer(&mut settings_file, &settings)?; + settings_file.flush()?; + Ok(()) + } +} + pub struct TaskWriter { queue: BufWriter, update_files: PathBuf, diff --git a/crates/file-store/Cargo.toml b/crates/file-store/Cargo.toml index 66ea65336..864b9caff 100644 --- a/crates/file-store/Cargo.toml +++ b/crates/file-store/Cargo.toml @@ -11,7 +11,7 @@ edition.workspace = true license.workspace = true [dependencies] -tempfile = "3.15.0" -thiserror = "2.0.9" +tempfile = "3.20.0" +thiserror = "2.0.12" tracing = "0.1.41" -uuid = { version = "1.11.0", features = ["serde", "v4"] } +uuid = { version = "1.17.0", features = ["serde", "v4"] } diff --git a/crates/filter-parser/Cargo.toml b/crates/filter-parser/Cargo.toml index 2657315a4..6eeb0794b 100644 --- a/crates/filter-parser/Cargo.toml +++ b/crates/filter-parser/Cargo.toml @@ -14,7 +14,7 @@ license.workspace = true [dependencies] nom = "7.1.3" nom_locate = "4.2.0" -unescaper = "0.1.5" +unescaper = "0.1.6" [dev-dependencies] # fixed version due to format breakages in v1.40 diff --git a/crates/flatten-serde-json/Cargo.toml b/crates/flatten-serde-json/Cargo.toml index 7b498ec4f..27a2c089f 100644 --- a/crates/flatten-serde-json/Cargo.toml +++ b/crates/flatten-serde-json/Cargo.toml @@ -16,7 +16,7 @@ license.workspace = true serde_json = "1.0" [dev-dependencies] -criterion = { version = "0.5.1", features = ["html_reports"] } +criterion = { version = "0.6.0", features = ["html_reports"] } [[bench]] name = "benchmarks" diff --git a/crates/fuzzers/Cargo.toml b/crates/fuzzers/Cargo.toml index a838350ba..6daf95904 100644 --- a/crates/fuzzers/Cargo.toml +++ b/crates/fuzzers/Cargo.toml @@ -12,11 +12,11 @@ license.workspace = true [dependencies] arbitrary = { version = "1.4.1", features = ["derive"] } -bumpalo = "3.16.0" -clap = { version = "4.5.24", features = ["derive"] } -either = "1.13.0" +bumpalo = "3.18.1" +clap = { version = "4.5.40", features = ["derive"] } +either = "1.15.0" fastrand = "2.3.0" milli = { path = "../milli" } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.135", features = ["preserve_order"] } -tempfile = "3.15.0" +serde = { version = "1.0.219", features = ["derive"] } +serde_json = { version = "1.0.140", features = ["preserve_order"] } +tempfile = "3.20.0" diff --git a/crates/fuzzers/src/bin/fuzz-indexing.rs b/crates/fuzzers/src/bin/fuzz-indexing.rs index 4df989b51..ec1f96fd5 100644 --- a/crates/fuzzers/src/bin/fuzz-indexing.rs +++ b/crates/fuzzers/src/bin/fuzz-indexing.rs @@ -13,7 +13,7 @@ use milli::heed::EnvOpenOptions; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::IndexerConfig; -use milli::vector::EmbeddingConfigs; +use milli::vector::RuntimeEmbedders; use milli::Index; use serde_json::Value; use tempfile::TempDir; @@ -89,7 +89,7 @@ fn main() { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let mut operations = Vec::new(); @@ -144,6 +144,7 @@ fn main() { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml index b4f187729..de0d01935 100644 --- a/crates/index-scheduler/Cargo.toml +++ b/crates/index-scheduler/Cargo.toml @@ -11,31 +11,31 @@ edition.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.95" +anyhow = "1.0.98" bincode = "1.3.3" byte-unit = "5.1.6" -bumpalo = "3.16.0" +bumpalo = "3.18.1" bumparaw-collections = "0.1.4" -convert_case = "0.6.0" +convert_case = "0.8.0" csv = "1.3.1" derive_builder = "0.20.2" dump = { path = "../dump" } enum-iterator = "2.1.0" file-store = { path = "../file-store" } -flate2 = "1.0.35" -indexmap = "2.7.0" +flate2 = "1.1.2" +indexmap = "2.9.0" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } memmap2 = "0.9.5" page_size = "0.6.0" rayon = "1.10.0" -roaring = { version = "0.10.10", features = ["serde"] } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.138", features = ["preserve_order"] } +roaring = { version = "0.10.12", features = ["serde"] } +serde = { version = "1.0.219", features = ["derive"] } +serde_json = { version = "1.0.140", features = ["preserve_order"] } synchronoise = "1.0.1" -tempfile = "3.15.0" -thiserror = "2.0.9" -time = { version = "0.3.37", features = [ +tempfile = "3.20.0" +thiserror = "2.0.12" +time = { version = "0.3.41", features = [ "serde-well-known", "formatting", "parsing", @@ -43,7 +43,8 @@ time = { version = "0.3.37", features = [ ] } tracing = "0.1.41" ureq = "2.12.1" -uuid = { version = "1.11.0", features = ["serde", "v4"] } +uuid = { version = "1.17.0", features = ["serde", "v4"] } +backoff = "0.4.0" [dev-dependencies] big_s = "1.0.2" diff --git a/crates/index-scheduler/src/dump.rs b/crates/index-scheduler/src/dump.rs index ca26e50c8..1e681c8e8 100644 --- a/crates/index-scheduler/src/dump.rs +++ b/crates/index-scheduler/src/dump.rs @@ -4,6 +4,7 @@ use std::io; use dump::{KindDump, TaskDump, UpdateFile}; use meilisearch_types::batches::{Batch, BatchId}; use meilisearch_types::heed::RwTxn; +use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::milli; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use roaring::RoaringBitmap; @@ -211,6 +212,23 @@ impl<'a> Dump<'a> { KindWithContent::DumpCreation { keys, instance_uid } } KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, + KindDump::Export { url, api_key, payload_size, indexes } => { + KindWithContent::Export { + url, + api_key, + payload_size, + indexes: indexes + .into_iter() + .map(|(pattern, settings)| { + Ok(( + IndexUidPattern::try_from(pattern) + .map_err(|_| Error::CorruptedDump)?, + settings, + )) + }) + .collect::>()?, + } + } KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from }, }, }; diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs index cb798b385..60669ff2d 100644 --- a/crates/index-scheduler/src/error.rs +++ b/crates/index-scheduler/src/error.rs @@ -151,6 +151,10 @@ pub enum Error { CorruptedTaskQueue, #[error(transparent)] DatabaseUpgrade(Box), + #[error(transparent)] + Export(Box), + #[error("Failed to export documents to remote server {code} ({type}): {message} <{link}>")] + FromRemoteWhenExporting { message: String, code: String, r#type: String, link: String }, #[error("Failed to rollback for index `{index}`: {rollback_outcome} ")] RollbackFailed { index: String, rollback_outcome: RollbackOutcome }, #[error(transparent)] @@ -212,6 +216,7 @@ impl Error { | Error::BatchNotFound(_) | Error::TaskDeletionWithEmptyQuery | Error::TaskCancelationWithEmptyQuery + | Error::FromRemoteWhenExporting { .. } | Error::AbortedTask | Error::Dump(_) | Error::Heed(_) @@ -221,6 +226,7 @@ impl Error { | Error::IoError(_) | Error::Persist(_) | Error::FeatureNotEnabled(_) + | Error::Export(_) | Error::Anyhow(_) => true, Error::CreateBatch(_) | Error::CorruptedTaskQueue @@ -282,6 +288,7 @@ impl ErrorCode for Error { Error::Dump(e) => e.error_code(), Error::Milli { error, .. } => error.error_code(), Error::ProcessBatchPanicked(_) => Code::Internal, + Error::FromRemoteWhenExporting { .. } => Code::Internal, Error::Heed(e) => e.error_code(), Error::HeedTransaction(e) => e.error_code(), Error::FileStore(e) => e.error_code(), @@ -294,6 +301,7 @@ impl ErrorCode for Error { Error::CorruptedTaskQueue => Code::Internal, Error::CorruptedDump => Code::Internal, Error::DatabaseUpgrade(_) => Code::Internal, + Error::Export(_) => Code::Internal, Error::RollbackFailed { .. } => Code::Internal, Error::UnrecoverableError(_) => Code::Internal, Error::IndexSchedulerVersionMismatch { .. } => Code::Internal, diff --git a/crates/index-scheduler/src/features.rs b/crates/index-scheduler/src/features.rs index 109e6b867..b52a659a6 100644 --- a/crates/index-scheduler/src/features.rs +++ b/crates/index-scheduler/src/features.rs @@ -131,6 +131,32 @@ impl RoFeatures { .into()) } } + + pub fn check_chat_completions(&self, disabled_action: &'static str) -> Result<()> { + if self.runtime.chat_completions { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action, + feature: "chat completions", + issue_link: "https://github.com/orgs/meilisearch/discussions/835", + } + .into()) + } + } + + pub fn check_multimodal(&self, disabled_action: &'static str) -> Result<()> { + if self.runtime.multimodal { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action, + feature: "multimodal", + issue_link: "https://github.com/orgs/meilisearch/discussions/846", + } + .into()) + } + } } impl FeatureData { diff --git a/crates/index-scheduler/src/insta_snapshot.rs b/crates/index-scheduler/src/insta_snapshot.rs index 89e615132..0cbbb2514 100644 --- a/crates/index-scheduler/src/insta_snapshot.rs +++ b/crates/index-scheduler/src/insta_snapshot.rs @@ -34,6 +34,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { planned_failures: _, run_loop_iteration: _, embedders: _, + chat_settings: _, } = scheduler; let rtxn = env.read_txn().unwrap(); @@ -288,6 +289,9 @@ fn snapshot_details(d: &Details) -> String { Details::IndexSwap { swaps } => { format!("{{ swaps: {swaps:?} }}") } + Details::Export { url, api_key, payload_size, indexes } => { + format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}") + } Details::UpgradeDatabase { from, to } => { format!("{{ from: {from:?}, to: {to:?} }}") } @@ -342,6 +346,7 @@ pub fn snapshot_batch(batch: &Batch) -> String { uid, details, stats, + embedder_stats, started_at, finished_at, progress: _, @@ -365,6 +370,12 @@ pub fn snapshot_batch(batch: &Batch) -> String { snap.push_str(&format!("uid: {uid}, ")); snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap())); snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap())); + if !embedder_stats.skip_serializing() { + snap.push_str(&format!( + "embedder stats: {}, ", + serde_json::to_string(&embedder_stats).unwrap() + )); + } snap.push_str(&format!("stop reason: {}, ", serde_json::to_string(&stop_reason).unwrap())); snap.push('}'); snap diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index 4f1109348..b2f27d66b 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -51,16 +51,21 @@ pub use features::RoFeatures; use flate2::bufread::GzEncoder; use flate2::Compression; use meilisearch_types::batches::Batch; -use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; +use meilisearch_types::features::{ + ChatCompletionSettings, InstanceTogglableFeatures, Network, RuntimeTogglableFeatures, +}; use meilisearch_types::heed::byteorder::BE; -use meilisearch_types::heed::types::I128; -use meilisearch_types::heed::{self, Env, RoTxn, WithoutTls}; -use meilisearch_types::milli::index::IndexEmbeddingConfig; +use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128}; +use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls}; use meilisearch_types::milli::update::IndexerConfig; -use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; +use meilisearch_types::milli::vector::json_template::JsonTemplate; +use meilisearch_types::milli::vector::{ + Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment, +}; use meilisearch_types::milli::{self, Index}; use meilisearch_types::task_view::TaskView; use meilisearch_types::tasks::{KindWithContent, Task}; +use milli::vector::db::IndexEmbeddingConfig; use processing::ProcessingTasks; pub use queue::Query; use queue::Queue; @@ -75,6 +80,7 @@ use crate::utils::clamp_to_page_size; pub(crate) type BEI128 = I128; const TASK_SCHEDULER_SIZE_THRESHOLD_PERCENT_INT: u64 = 40; +const CHAT_SETTINGS_DB_NAME: &str = "chat-settings"; #[derive(Debug)] pub struct IndexSchedulerOptions { @@ -131,6 +137,8 @@ pub struct IndexSchedulerOptions { /// /// 0 disables the cache. pub embedding_cache_cap: usize, + /// Snapshot compaction status. + pub experimental_no_snapshot_compaction: bool, } /// Structure which holds meilisearch's indexes and schedules the tasks @@ -151,6 +159,9 @@ pub struct IndexScheduler { /// In charge of fetching and setting the status of experimental features. features: features::FeatureData, + /// Stores the custom chat prompts and other settings of the indexes. + pub(crate) chat_settings: Database>, + /// Everything related to the processing of the tasks pub scheduler: scheduler::Scheduler, @@ -209,11 +220,16 @@ impl IndexScheduler { #[cfg(test)] run_loop_iteration: self.run_loop_iteration.clone(), features: self.features.clone(), + chat_settings: self.chat_settings, } } pub(crate) const fn nb_db() -> u32 { - Versioning::nb_db() + Queue::nb_db() + IndexMapper::nb_db() + features::FeatureData::nb_db() + Versioning::nb_db() + + Queue::nb_db() + + IndexMapper::nb_db() + + features::FeatureData::nb_db() + + 1 // chat-prompts } /// Create an index scheduler and start its run loop. @@ -267,6 +283,7 @@ impl IndexScheduler { let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?; let queue = Queue::new(&env, &mut wtxn, &options)?; let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?; + let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?; wtxn.commit()?; // allow unreachable_code to get rids of the warning in the case of a test build. @@ -290,12 +307,17 @@ impl IndexScheduler { #[cfg(test)] run_loop_iteration: Arc::new(RwLock::new(0)), features, + chat_settings, }; this.run(); Ok(this) } + fn read_txn(&self) -> Result> { + self.env.read_txn().map_err(|e| e.into()) + } + /// Return `Ok(())` if the index scheduler is able to access one of its database. pub fn health(&self) -> Result<()> { let rtxn = self.env.read_txn()?; @@ -372,15 +394,16 @@ impl IndexScheduler { } } - pub fn read_txn(&self) -> Result> { - self.env.read_txn().map_err(|e| e.into()) - } - /// Start the run loop for the given index scheduler. /// /// This function will execute in a different thread and must be called /// only once per index scheduler. fn run(&self) { + // If the number of batched tasks is 0, we don't need to run the scheduler at all. + // It will never be able to process any tasks. + if self.scheduler.max_number_of_batched_tasks == 0 { + return; + } let run = self.private_clone(); std::thread::Builder::new() .name(String::from("scheduler")) @@ -488,7 +511,7 @@ impl IndexScheduler { /// Returns the total number of indexes available for the specified filter. /// And a `Vec` of the index_uid + its stats - pub fn get_paginated_indexes_stats( + pub fn paginated_indexes_stats( &self, filters: &meilisearch_auth::AuthFilter, from: usize, @@ -529,6 +552,24 @@ impl IndexScheduler { ret.map(|ret| (total, ret)) } + /// Returns the total number of chat workspaces available ~~for the specified filter~~. + /// And a `Vec` of the workspace_uids + pub fn paginated_chat_workspace_uids( + &self, + from: usize, + limit: usize, + ) -> Result<(usize, Vec)> { + let rtxn = self.read_txn()?; + let total = self.chat_settings.len(&rtxn)?; + let mut iter = self.chat_settings.iter(&rtxn)?.skip(from); + iter.by_ref() + .take(limit) + .map(|ret| ret.map_err(Error::from)) + .map(|ret| ret.map(|(uid, _)| uid.to_string())) + .collect::, Error>>() + .map(|ret| (total as usize, ret)) + } + /// The returned structure contains: /// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`. /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. @@ -813,29 +854,42 @@ impl IndexScheduler { &self, index_uid: String, embedding_configs: Vec, - ) -> Result { + ) -> Result { let res: Result<_> = embedding_configs .into_iter() .map( |IndexEmbeddingConfig { name, config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized }, - .. - }| { - let prompt = Arc::new( - prompt - .try_into() - .map_err(meilisearch_types::milli::Error::from) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - ); + fragments, + }| + -> Result<(String, Arc)> { + let document_template = prompt + .try_into() + .map_err(meilisearch_types::milli::Error::from) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + + let fragments = fragments + .into_inner() + .into_iter() + .map(|fragment| { + let value = embedder_options.fragment(&fragment.name).unwrap(); + let template = JsonTemplate::new(value.clone()).unwrap(); + RuntimeFragment { name: fragment.name, id: fragment.id, template } + }) + .collect(); // optimistically return existing embedder { let embedders = self.embedders.read().unwrap(); if let Some(embedder) = embedders.get(&embedder_options) { - return Ok(( - name, - (embedder.clone(), prompt, quantized.unwrap_or_default()), + let runtime = Arc::new(RuntimeEmbedder::new( + embedder.clone(), + document_template, + fragments, + quantized.unwrap_or_default(), )); + + return Ok((name, runtime)); } } @@ -851,11 +905,44 @@ impl IndexScheduler { let mut embedders = self.embedders.write().unwrap(); embedders.insert(embedder_options, embedder.clone()); } - Ok((name, (embedder, prompt, quantized.unwrap_or_default()))) + + let runtime = Arc::new(RuntimeEmbedder::new( + embedder.clone(), + document_template, + fragments, + quantized.unwrap_or_default(), + )); + + Ok((name, runtime)) }, ) .collect(); - res.map(EmbeddingConfigs::new) + res.map(RuntimeEmbedders::new) + } + + pub fn chat_settings(&self, uid: &str) -> Result> { + let rtxn = self.env.read_txn()?; + self.chat_settings.get(&rtxn, uid).map_err(Into::into) + } + + /// Return true if chat workspace exists. + pub fn chat_workspace_exists(&self, name: &str) -> Result { + let rtxn = self.env.read_txn()?; + Ok(self.chat_settings.remap_data_type::().get(&rtxn, name)?.is_some()) + } + + pub fn put_chat_settings(&self, uid: &str, settings: &ChatCompletionSettings) -> Result<()> { + let mut wtxn = self.env.write_txn()?; + self.chat_settings.put(&mut wtxn, uid, settings)?; + wtxn.commit()?; + Ok(()) + } + + pub fn delete_chat_settings(&self, uid: &str) -> Result { + let mut wtxn = self.env.write_txn()?; + let deleted = self.chat_settings.delete(&mut wtxn, uid)?; + wtxn.commit()?; + Ok(deleted) } } diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs index f23b811e5..fdd8e42ef 100644 --- a/crates/index-scheduler/src/processing.rs +++ b/crates/index-scheduler/src/processing.rs @@ -103,6 +103,7 @@ make_enum_progress! { pub enum DumpCreationProgress { StartTheDumpCreation, DumpTheApiKeys, + DumpTheChatCompletionSettings, DumpTheTasks, DumpTheBatches, DumpTheIndexes, @@ -175,8 +176,17 @@ make_enum_progress! { } } +make_enum_progress! { + pub enum Export { + EnsuringCorrectnessOfTheTarget, + ExportingTheSettings, + ExportingTheDocuments, + } +} + make_atomic_progress!(Task alias AtomicTaskStep => "task" ); make_atomic_progress!(Document alias AtomicDocumentStep => "document" ); +make_atomic_progress!(Index alias AtomicIndexStep => "index" ); make_atomic_progress!(Batch alias AtomicBatchStep => "batch" ); make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" ); diff --git a/crates/index-scheduler/src/queue/batches.rs b/crates/index-scheduler/src/queue/batches.rs index b5b63e1d7..b96f65836 100644 --- a/crates/index-scheduler/src/queue/batches.rs +++ b/crates/index-scheduler/src/queue/batches.rs @@ -179,6 +179,7 @@ impl BatchQueue { progress: None, details: batch.details, stats: batch.stats, + embedder_stats: batch.embedder_stats.as_ref().into(), started_at: batch.started_at, finished_at: batch.finished_at, enqueued_at: batch.enqueued_at, diff --git a/crates/index-scheduler/src/queue/batches_test.rs b/crates/index-scheduler/src/queue/batches_test.rs index 73567ef78..782acb4b1 100644 --- a/crates/index-scheduler/src/queue/batches_test.rs +++ b/crates/index-scheduler/src/queue/batches_test.rs @@ -127,7 +127,7 @@ fn query_batches_simple() { "startedAt": "1970-01-01T00:00:00Z", "finishedAt": null, "enqueuedAt": null, - "stopReason": "task with id 0 of type `indexCreation` cannot be batched" + "stopReason": "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task." } "###); diff --git a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap index 2dafc2719..48d1ccaab 100644 --- a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap @@ -48,8 +48,8 @@ catto: { number_of_documents: 0, field_distribution: {} } [timestamp] [1,2,3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"sheep","matchedTasks":3,"canceledTasks":2,"originalFilter":"test_query","swaps":[{"indexes":["catto","doggo"]}]}, stats: {"totalNbTasks":3,"status":{"succeeded":1,"canceled":2},"types":{"indexCreation":1,"indexSwap":1,"taskCancelation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 3 of type `taskCancelation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"sheep","matchedTasks":3,"canceledTasks":2,"originalFilter":"test_query","swaps":[{"indexes":["catto","doggo"]}]}, stats: {"totalNbTasks":3,"status":{"succeeded":1,"canceled":2},"types":{"indexCreation":1,"indexSwap":1,"taskCancelation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `taskCancelation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap index 56fed6a13..4c54de49a 100644 --- a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap @@ -47,9 +47,9 @@ whalo: { number_of_documents: 0, field_distribution: {} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"plankton"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"his_own_vomit"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"plankton"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"his_own_vomit"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap index 7ef7b4905..7ce0d3ca3 100644 --- a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap @@ -4,7 +4,7 @@ source: crates/index-scheduler/src/queue/batches_test.rs ### Autobatching Enabled = true ### Processing batch Some(1): [1,] -{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } +{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### All Tasks: 0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} @@ -42,7 +42,7 @@ catto: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap index fef01fe95..603544991 100644 --- a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap @@ -47,9 +47,9 @@ doggo: { number_of_documents: 0, field_distribution: {} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"fish"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"fish"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap index 87c841ba9..84d6c7878 100644 --- a/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap @@ -52,10 +52,10 @@ doggo: { number_of_documents: 0, field_distribution: {} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"swaps":[{"indexes":["catto","doggo"]}]}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 2 of type `indexSwap` cannot be batched", } -3 {uid: 3, details: {"swaps":[{"indexes":["catto","whalo"]}]}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 3 of type `indexSwap` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"swaps":[{"indexes":["catto","doggo"]}]}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 2 of type `indexSwap` that cannot be batched with any other task.", } +3 {uid: 3, details: {"swaps":[{"indexes":["catto","whalo"]}]}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 3 of type `indexSwap` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap index 292382fac..e3c26b2b3 100644 --- a/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap @@ -48,8 +48,8 @@ catto: { number_of_documents: 0, field_distribution: {} } [timestamp] [1,2,3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"sheep","matchedTasks":3,"canceledTasks":2,"originalFilter":"test_query","swaps":[{"indexes":["catto","doggo"]}]}, stats: {"totalNbTasks":3,"status":{"succeeded":1,"canceled":2},"types":{"indexCreation":1,"indexSwap":1,"taskCancelation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 3 of type `taskCancelation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"sheep","matchedTasks":3,"canceledTasks":2,"originalFilter":"test_query","swaps":[{"indexes":["catto","doggo"]}]}, stats: {"totalNbTasks":3,"status":{"succeeded":1,"canceled":2},"types":{"indexCreation":1,"indexSwap":1,"taskCancelation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `taskCancelation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap index 18358c998..4475c71fc 100644 --- a/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap @@ -47,9 +47,9 @@ whalo: { number_of_documents: 0, field_distribution: {} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"plankton"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"his_own_vomit"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"plankton"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"his_own_vomit"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap index dd31e587c..4ac6201a6 100644 --- a/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap @@ -47,9 +47,9 @@ doggo: { number_of_documents: 0, field_distribution: {} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"fish"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"fish"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"whalo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/autobatcher.rs b/crates/index-scheduler/src/scheduler/autobatcher.rs index b57983291..b3f7d2743 100644 --- a/crates/index-scheduler/src/scheduler/autobatcher.rs +++ b/crates/index-scheduler/src/scheduler/autobatcher.rs @@ -71,6 +71,7 @@ impl From for AutobatchKind { KindWithContent::TaskCancelation { .. } | KindWithContent::TaskDeletion { .. } | KindWithContent::DumpCreation { .. } + | KindWithContent::Export { .. } | KindWithContent::UpgradeDatabase { .. } | KindWithContent::SnapshotCreation => { panic!("The autobatcher should never be called with tasks that don't apply to an index.") diff --git a/crates/index-scheduler/src/scheduler/create_batch.rs b/crates/index-scheduler/src/scheduler/create_batch.rs index e3763881b..e78ed2c2e 100644 --- a/crates/index-scheduler/src/scheduler/create_batch.rs +++ b/crates/index-scheduler/src/scheduler/create_batch.rs @@ -1,4 +1,5 @@ use std::fmt; +use std::io::ErrorKind; use meilisearch_types::heed::RoTxn; use meilisearch_types::milli::update::IndexDocumentsMethod; @@ -47,6 +48,9 @@ pub(crate) enum Batch { IndexSwap { task: Task, }, + Export { + task: Task, + }, UpgradeDatabase { tasks: Vec, }, @@ -103,6 +107,7 @@ impl Batch { Batch::TaskCancelation { task, .. } | Batch::Dump(task) | Batch::IndexCreation { task, .. } + | Batch::Export { task } | Batch::IndexUpdate { task, .. } => { RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() } @@ -142,6 +147,7 @@ impl Batch { | TaskDeletions(_) | SnapshotCreation(_) | Dump(_) + | Export { .. } | UpgradeDatabase { .. } | IndexSwap { .. } => None, IndexOperation { op, .. } => Some(op.index_uid()), @@ -167,6 +173,7 @@ impl fmt::Display for Batch { Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, + Batch::Export { .. } => f.write_str("Export")?, Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?, }; match index_uid { @@ -426,9 +433,10 @@ impl IndexScheduler { /// 0. We get the *last* task to cancel. /// 1. We get the tasks to upgrade. /// 2. We get the *next* task to delete. - /// 3. We get the *next* snapshot to process. - /// 4. We get the *next* dump to process. - /// 5. We get the *next* tasks to process for a specific index. + /// 3. We get the *next* export to process. + /// 4. We get the *next* snapshot to process. + /// 5. We get the *next* dump to process. + /// 6. We get the *next* tasks to process for a specific index. #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] pub(crate) fn create_next_batch( &self, @@ -500,7 +508,17 @@ impl IndexScheduler { return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); } - // 3. we batch the snapshot. + // 3. we batch the export. + let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued; + if !to_export.is_empty() { + let task_id = to_export.iter().next().expect("There must be at least one export task"); + let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap(); + current_batch.processing([&mut task]); + current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export }); + return Ok(Some((Batch::Export { task }, current_batch))); + } + + // 4. we batch the snapshot. let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; if !to_snapshot.is_empty() { let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?; @@ -510,7 +528,7 @@ impl IndexScheduler { return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); } - // 4. we batch the dumps. + // 5. we batch the dumps. let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued; if let Some(to_dump) = to_dump.min() { let mut task = @@ -523,7 +541,7 @@ impl IndexScheduler { return Ok(Some((Batch::Dump(task), current_batch))); } - // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. + // 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; let mut task = self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; @@ -577,7 +595,11 @@ impl IndexScheduler { .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?; if let Some(uuid) = task.content_uuid() { - let content_size = self.queue.file_store.compute_size(uuid)?; + let content_size = match self.queue.file_store.compute_size(uuid) { + Ok(content_size) => content_size, + Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0, + Err(otherwise) => return Err(otherwise.into()), + }; total_size = total_size.saturating_add(content_size); } diff --git a/crates/index-scheduler/src/scheduler/mod.rs b/crates/index-scheduler/src/scheduler/mod.rs index f0e324a8d..5ac591143 100644 --- a/crates/index-scheduler/src/scheduler/mod.rs +++ b/crates/index-scheduler/src/scheduler/mod.rs @@ -4,6 +4,7 @@ mod autobatcher_test; mod create_batch; mod process_batch; mod process_dump_creation; +mod process_export; mod process_index_operation; mod process_snapshot_creation; mod process_upgrade; @@ -83,6 +84,9 @@ pub struct Scheduler { /// /// 0 disables the cache. pub(crate) embedding_cache_cap: usize, + + /// Snapshot compaction status. + pub(crate) experimental_no_snapshot_compaction: bool, } impl Scheduler { @@ -98,6 +102,7 @@ impl Scheduler { auth_env: self.auth_env.clone(), version_file_path: self.version_file_path.clone(), embedding_cache_cap: self.embedding_cache_cap, + experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction, } } @@ -114,6 +119,7 @@ impl Scheduler { auth_env, version_file_path: options.version_file_path.clone(), embedding_cache_cap: options.embedding_cache_cap, + experimental_no_snapshot_compaction: options.experimental_no_snapshot_compaction, } } } @@ -370,9 +376,11 @@ impl IndexScheduler { post_commit_dabases_sizes .get(dbname) .map(|post_size| { - use byte_unit::{Byte, UnitType::Binary}; use std::cmp::Ordering::{Equal, Greater, Less}; + use byte_unit::Byte; + use byte_unit::UnitType::Binary; + let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary); let diff_size = post_size.abs_diff(*pre_size) as u64; let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary); diff --git a/crates/index-scheduler/src/scheduler/process_batch.rs b/crates/index-scheduler/src/scheduler/process_batch.rs index c349f90ad..c21ab27ad 100644 --- a/crates/index-scheduler/src/scheduler/process_batch.rs +++ b/crates/index-scheduler/src/scheduler/process_batch.rs @@ -162,8 +162,13 @@ impl IndexScheduler { .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?; - let (tasks, congestion) = - self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?; + let (tasks, congestion) = self.apply_index_operation( + &mut index_wtxn, + &index, + op, + &progress, + current_batch.embedder_stats.clone(), + )?; { progress.update_progress(FinalizingIndexStep::Committing); @@ -238,10 +243,12 @@ impl IndexScheduler { ); builder.set_primary_key(primary_key); let must_stop_processing = self.scheduler.must_stop_processing.clone(); + builder .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), + &|| must_stop_processing.get(), + &progress, + current_batch.embedder_stats.clone(), ) .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; index_wtxn.commit()?; @@ -361,6 +368,46 @@ impl IndexScheduler { task.status = Status::Succeeded; Ok((vec![task], ProcessBatchInfo::default())) } + Batch::Export { mut task } => { + let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind + else { + unreachable!() + }; + + let ret = catch_unwind(AssertUnwindSafe(|| { + self.process_export( + url, + api_key.as_deref(), + payload_size.as_ref(), + indexes, + progress, + ) + })); + + let stats = match ret { + Ok(Ok(stats)) => stats, + Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask), + Ok(Err(e)) => return Err(Error::Export(Box::new(e))), + Err(e) => { + let msg = match e.downcast_ref::<&'static str>() { + Some(s) => *s, + None => match e.downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + return Err(Error::Export(Box::new(Error::ProcessBatchPanicked( + msg.to_string(), + )))); + } + }; + + task.status = Status::Succeeded; + if let Some(Details::Export { indexes, .. }) = task.details.as_mut() { + *indexes = stats; + } + Ok((vec![task], ProcessBatchInfo::default())) + } Batch::UpgradeDatabase { mut tasks } => { let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else { unreachable!(); @@ -708,9 +755,11 @@ impl IndexScheduler { from.1, from.2 ); - match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let ret = catch_unwind(std::panic::AssertUnwindSafe(|| { self.process_rollback(from, progress) - })) { + })); + + match ret { Ok(Ok(())) => {} Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))), Err(e) => { diff --git a/crates/index-scheduler/src/scheduler/process_dump_creation.rs b/crates/index-scheduler/src/scheduler/process_dump_creation.rs index a6d785b2f..b8d100415 100644 --- a/crates/index-scheduler/src/scheduler/process_dump_creation.rs +++ b/crates/index-scheduler/src/scheduler/process_dump_creation.rs @@ -43,7 +43,16 @@ impl IndexScheduler { let rtxn = self.env.read_txn()?; - // 2. dump the tasks + // 2. dump the chat completion settings + // TODO should I skip the export if the chat completion has been disabled? + progress.update_progress(DumpCreationProgress::DumpTheChatCompletionSettings); + let mut dump_chat_completion_settings = dump.create_chat_completions_settings()?; + for result in self.chat_settings.iter(&rtxn)? { + let (name, chat_settings) = result?; + dump_chat_completion_settings.push_settings(name, &chat_settings)?; + } + + // 3. dump the tasks progress.update_progress(DumpCreationProgress::DumpTheTasks); let mut dump_tasks = dump.create_tasks_queue()?; @@ -81,7 +90,7 @@ impl IndexScheduler { let mut dump_content_file = dump_tasks.push_task(&t.into())?; - // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. + // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. if let Some(content_file) = content_file { if self.scheduler.must_stop_processing.get() { return Err(Error::AbortedTask); @@ -105,7 +114,7 @@ impl IndexScheduler { } dump_tasks.flush()?; - // 3. dump the batches + // 4. dump the batches progress.update_progress(DumpCreationProgress::DumpTheBatches); let mut dump_batches = dump.create_batches_queue()?; @@ -138,7 +147,7 @@ impl IndexScheduler { } dump_batches.flush()?; - // 4. Dump the indexes + // 5. Dump the indexes progress.update_progress(DumpCreationProgress::DumpTheIndexes); let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; let mut count = 0; @@ -165,9 +174,6 @@ impl IndexScheduler { let fields_ids_map = index.fields_ids_map(&rtxn)?; let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let embedding_configs = index - .embedding_configs(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; let nb_documents = index .number_of_documents(&rtxn) @@ -178,7 +184,7 @@ impl IndexScheduler { let documents = index .all_documents(&rtxn) .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - // 4.1. Dump the documents + // 5.1. Dump the documents for ret in documents { if self.scheduler.must_stop_processing.get() { return Err(Error::AbortedTask); @@ -221,16 +227,12 @@ impl IndexScheduler { return Err(Error::from_milli(user_err, Some(uid.to_string()))); }; - for (embedder_name, embeddings) in embeddings { - let user_provided = embedding_configs - .iter() - .find(|conf| conf.name == embedder_name) - .is_some_and(|conf| conf.user_provided.contains(id)); + for (embedder_name, (embeddings, regenerate)) in embeddings { let embeddings = ExplicitVectors { embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( embeddings, )), - regenerate: !user_provided, + regenerate, }; vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap()); } @@ -240,7 +242,7 @@ impl IndexScheduler { atomic.fetch_add(1, Ordering::Relaxed); } - // 4.2. Dump the settings + // 5.2. Dump the settings let settings = meilisearch_types::settings::settings( index, &rtxn, @@ -251,7 +253,7 @@ impl IndexScheduler { Ok(()) })?; - // 5. Dump experimental feature settings + // 6. Dump experimental feature settings progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); let features = self.features().runtime_features(); dump.create_experimental_features(features)?; diff --git a/crates/index-scheduler/src/scheduler/process_export.rs b/crates/index-scheduler/src/scheduler/process_export.rs new file mode 100644 index 000000000..2062e1c28 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_export.rs @@ -0,0 +1,365 @@ +use std::collections::BTreeMap; +use std::io::{self, Write as _}; +use std::sync::atomic; +use std::time::Duration; + +use backoff::ExponentialBackoff; +use byte_unit::Byte; +use flate2::write::GzEncoder; +use flate2::Compression; +use meilisearch_types::index_uid_pattern::IndexUidPattern; +use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; +use meilisearch_types::milli::progress::{Progress, VariableNameStep}; +use meilisearch_types::milli::update::{request_threads, Setting}; +use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; +use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError}; +use meilisearch_types::settings::{self, SecretPolicy}; +use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings}; +use serde::Deserialize; +use ureq::{json, Response}; + +use super::MustStopProcessing; +use crate::processing::AtomicDocumentStep; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + pub(super) fn process_export( + &self, + base_url: &str, + api_key: Option<&str>, + payload_size: Option<&Byte>, + indexes: &BTreeMap, + progress: Progress, + ) -> Result> { + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::ProcessExport)?; + + let indexes: Vec<_> = self + .index_names()? + .into_iter() + .flat_map(|uid| { + indexes + .iter() + .find(|(pattern, _)| pattern.matches_str(&uid)) + .map(|(pattern, settings)| (pattern, uid, settings)) + }) + .collect(); + + let mut output = BTreeMap::new(); + let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build(); + let must_stop_processing = self.scheduler.must_stop_processing.clone(); + for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() { + if must_stop_processing.get() { + return Err(Error::AbortedTask); + } + + progress.update_progress(VariableNameStep::::new( + format!("Exporting index `{uid}`"), + i as u32, + indexes.len() as u32, + )); + + let ExportIndexSettings { filter, override_settings } = export_settings; + let index = self.index(uid)?; + let index_rtxn = index.read_txn()?; + + // First, check if the index already exists + let url = format!("{base_url}/indexes/{uid}"); + let response = retry(&must_stop_processing, || { + let mut request = agent.get(&url); + if let Some(api_key) = api_key { + request = request.set("Authorization", &format!("Bearer {api_key}")); + } + + request.send_bytes(Default::default()).map_err(into_backoff_error) + }); + let index_exists = match response { + Ok(response) => response.status() == 200, + Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => { + false + } + Err(e) => return Err(e), + }; + + let primary_key = index + .primary_key(&index_rtxn) + .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + + // Create the index + if !index_exists { + let url = format!("{base_url}/indexes"); + retry(&must_stop_processing, || { + let mut request = agent.post(&url); + if let Some(api_key) = api_key { + request = request.set("Authorization", &format!("Bearer {api_key}")); + } + let index_param = json!({ "uid": uid, "primaryKey": primary_key }); + request.send_json(&index_param).map_err(into_backoff_error) + })?; + } + + // Patch the index primary key + if index_exists && *override_settings { + let url = format!("{base_url}/indexes/{uid}"); + retry(&must_stop_processing, || { + let mut request = agent.patch(&url); + if let Some(api_key) = api_key { + request = request.set("Authorization", &format!("Bearer {api_key}")); + } + let index_param = json!({ "primaryKey": primary_key }); + request.send_json(&index_param).map_err(into_backoff_error) + })?; + } + + // Send the index settings + if !index_exists || *override_settings { + let mut settings = + settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + // Remove the experimental chat setting if not enabled + if self.features().check_chat_completions("exporting chat settings").is_err() { + settings.chat = Setting::NotSet; + } + // Retry logic for sending settings + let url = format!("{base_url}/indexes/{uid}/settings"); + let bearer = api_key.map(|api_key| format!("Bearer {api_key}")); + retry(&must_stop_processing, || { + let mut request = agent.patch(&url); + if let Some(bearer) = bearer.as_ref() { + request = request.set("Authorization", bearer); + } + request.send_json(settings.clone()).map_err(into_backoff_error) + })?; + } + + let filter = filter + .as_ref() + .map(Filter::from_json) + .transpose() + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? + .flatten(); + + let filter_universe = filter + .map(|f| f.evaluate(&index_rtxn, &index)) + .transpose() + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + let whole_universe = index + .documents_ids(&index_rtxn) + .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + let universe = filter_universe.unwrap_or(whole_universe); + + let fields_ids_map = index.fields_ids_map(&index_rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + // We don't need to keep this one alive as we will + // spawn many threads to process the documents + drop(index_rtxn); + + let total_documents = universe.len() as u32; + let (step, progress_step) = AtomicDocumentStep::new(total_documents); + progress.update_progress(progress_step); + + output.insert( + IndexUidPattern::new_unchecked(uid.clone()), + DetailsExportIndexSettings { + settings: (*export_settings).clone(), + matched_documents: Some(total_documents as u64), + }, + ); + + let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(50 * 1024 * 1024); // defaults to 50 MiB + let documents_url = format!("{base_url}/indexes/{uid}/documents"); + + request_threads() + .broadcast(|ctx| { + let index_rtxn = index + .read_txn() + .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + + let mut buffer = Vec::new(); + let mut tmp_buffer = Vec::new(); + let mut compressed_buffer = Vec::new(); + for (i, docid) in universe.iter().enumerate() { + if i % ctx.num_threads() != ctx.index() { + continue; + } + + let document = index + .document(&index_rtxn, docid) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + let mut document = obkv_to_json(&all_fields, &fields_ids_map, document) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + // TODO definitely factorize this code + 'inject_vectors: { + let embeddings = index + .embeddings(&index_rtxn, docid) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + if embeddings.is_empty() { + break 'inject_vectors; + } + + let vectors = document + .entry(RESERVED_VECTORS_FIELD_NAME) + .or_insert(serde_json::Value::Object(Default::default())); + + let serde_json::Value::Object(vectors) = vectors else { + return Err(Error::from_milli( + milli::Error::UserError( + milli::UserError::InvalidVectorsMapType { + document_id: { + if let Ok(Some(Ok(index))) = index + .external_id_of( + &index_rtxn, + std::iter::once(docid), + ) + .map(|it| it.into_iter().next()) + { + index + } else { + format!("internal docid={docid}") + } + }, + value: vectors.clone(), + }, + ), + Some(uid.to_string()), + )); + }; + + for (embedder_name, (embeddings, regenerate)) in embeddings { + let embeddings = ExplicitVectors { + embeddings: Some( + VectorOrArrayOfVectors::from_array_of_vectors(embeddings), + ), + regenerate, + }; + vectors.insert( + embedder_name, + serde_json::to_value(embeddings).unwrap(), + ); + } + } + + tmp_buffer.clear(); + serde_json::to_writer(&mut tmp_buffer, &document) + .map_err(milli::InternalError::from) + .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + + // Make sure we put at least one document in the buffer even + // though we might go above the buffer limit before sending + if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit { + // We compress the documents before sending them + let mut encoder = + GzEncoder::new(&mut compressed_buffer, Compression::default()); + encoder + .write_all(&buffer) + .map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?; + encoder + .finish() + .map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?; + + retry(&must_stop_processing, || { + let mut request = agent.post(&documents_url); + request = request.set("Content-Type", "application/x-ndjson"); + request = request.set("Content-Encoding", "gzip"); + if let Some(api_key) = api_key { + request = request + .set("Authorization", &(format!("Bearer {api_key}"))); + } + request.send_bytes(&compressed_buffer).map_err(into_backoff_error) + })?; + buffer.clear(); + compressed_buffer.clear(); + } + buffer.extend_from_slice(&tmp_buffer); + + if i % 100 == 0 { + step.fetch_add(100, atomic::Ordering::Relaxed); + } + } + + retry(&must_stop_processing, || { + let mut request = agent.post(&documents_url); + request = request.set("Content-Type", "application/x-ndjson"); + if let Some(api_key) = api_key { + request = request.set("Authorization", &(format!("Bearer {api_key}"))); + } + request.send_bytes(&buffer).map_err(into_backoff_error) + })?; + + Ok(()) + }) + .map_err(|e| { + Error::from_milli( + milli::Error::InternalError(InternalError::PanicInThreadPool(e)), + Some(uid.to_string()), + ) + })?; + + step.store(total_documents, atomic::Ordering::Relaxed); + } + + Ok(output) + } +} + +fn retry(must_stop_processing: &MustStopProcessing, send_request: F) -> Result +where + F: Fn() -> Result>, +{ + match backoff::retry(ExponentialBackoff::default(), || { + if must_stop_processing.get() { + return Err(backoff::Error::Permanent(ureq::Error::Status( + u16::MAX, + // 444: Connection Closed Without Response + Response::new(444, "Abort", "Aborted task").unwrap(), + ))); + } + send_request() + }) { + Ok(response) => Ok(response), + Err(backoff::Error::Permanent(e)) => Err(ureq_error_into_error(e)), + Err(backoff::Error::Transient { err, retry_after: _ }) => Err(ureq_error_into_error(err)), + } +} + +fn into_backoff_error(err: ureq::Error) -> backoff::Error { + match err { + // Those code status must trigger an automatic retry + // + ureq::Error::Status(408 | 429 | 500 | 502 | 503 | 504, _) => { + backoff::Error::Transient { err, retry_after: None } + } + ureq::Error::Status(_, _) => backoff::Error::Permanent(err), + ureq::Error::Transport(_) => backoff::Error::Transient { err, retry_after: None }, + } +} + +/// Converts a `ureq::Error` into an `Error`. +fn ureq_error_into_error(error: ureq::Error) -> Error { + #[derive(Deserialize)] + struct MeiliError { + message: String, + code: String, + r#type: String, + link: String, + } + + match error { + // This is a workaround to handle task abortion - the error propagation path + // makes it difficult to cleanly surface the abortion at this level. + ureq::Error::Status(u16::MAX, _) => Error::AbortedTask, + ureq::Error::Status(_, response) => match response.into_json() { + Ok(MeiliError { message, code, r#type, link }) => { + Error::FromRemoteWhenExporting { message, code, r#type, link } + } + Err(e) => e.into(), + }, + ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(), + } +} + +enum ExportIndex {} diff --git a/crates/index-scheduler/src/scheduler/process_index_operation.rs b/crates/index-scheduler/src/scheduler/process_index_operation.rs index 093c6209d..62d0e6545 100644 --- a/crates/index-scheduler/src/scheduler/process_index_operation.rs +++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs @@ -1,8 +1,10 @@ +use std::sync::Arc; + use bumpalo::collections::CollectIn; use bumpalo::Bump; use meilisearch_types::heed::RwTxn; use meilisearch_types::milli::documents::PrimaryKey; -use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::progress::{EmbedderStats, Progress}; use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; use meilisearch_types::milli::update::DocumentAdditionResult; use meilisearch_types::milli::{self, ChannelCongestion, Filter}; @@ -24,7 +26,7 @@ impl IndexScheduler { /// The list of processed tasks. #[tracing::instrument( level = "trace", - skip(self, index_wtxn, index, progress), + skip(self, index_wtxn, index, progress, embedder_stats), target = "indexing::scheduler" )] pub(crate) fn apply_index_operation<'i>( @@ -33,6 +35,7 @@ impl IndexScheduler { index: &'i Index, operation: IndexOperation, progress: &Progress, + embedder_stats: Arc, ) -> Result<(Vec, Option)> { let indexer_alloc = Bump::new(); let started_processing_at = std::time::Instant::now(); @@ -86,8 +89,9 @@ impl IndexScheduler { let mut content_files_iter = content_files.iter(); let mut indexer = indexer::DocumentOperation::new(); let embedders = index + .embedding_configs() .embedding_configs(index_wtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + .map_err(|e| Error::from_milli(e.into(), Some(index_uid.clone())))?; let embedders = self.embedders(index_uid.clone(), embedders)?; for operation in operations { match operation { @@ -177,6 +181,7 @@ impl IndexScheduler { embedders, &|| must_stop_processing.get(), progress, + &embedder_stats, ) .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?, ); @@ -270,8 +275,9 @@ impl IndexScheduler { }) .unwrap()?; let embedders = index + .embedding_configs() .embedding_configs(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; let embedders = self.embedders(index_uid.clone(), embedders)?; progress.update_progress(DocumentEditionProgress::Indexing); @@ -288,6 +294,7 @@ impl IndexScheduler { embedders, &|| must_stop_processing.get(), progress, + &embedder_stats, ) .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, ); @@ -418,8 +425,9 @@ impl IndexScheduler { indexer.delete_documents_by_docids(to_delete); let document_changes = indexer.into_changes(&indexer_alloc, primary_key); let embedders = index + .embedding_configs() .embedding_configs(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; let embedders = self.embedders(index_uid.clone(), embedders)?; progress.update_progress(DocumentDeletionProgress::Indexing); @@ -436,6 +444,7 @@ impl IndexScheduler { embedders, &|| must_stop_processing.get(), progress, + &embedder_stats, ) .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, ); @@ -468,14 +477,11 @@ impl IndexScheduler { } progress.update_progress(SettingsProgress::ApplyTheSettings); - builder - .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - ) + let congestion = builder + .execute(&|| must_stop_processing.get(), progress, embedder_stats) .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - Ok((tasks, None)) + Ok((tasks, congestion)) } IndexOperation::DocumentClearAndSetting { index_uid, @@ -491,6 +497,7 @@ impl IndexScheduler { tasks: cleared_tasks, }, progress, + embedder_stats.clone(), )?; let (settings_tasks, _congestion) = self.apply_index_operation( @@ -498,6 +505,7 @@ impl IndexScheduler { index, IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, progress, + embedder_stats, )?; let mut tasks = settings_tasks; diff --git a/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs b/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs index 599991a7d..d58157ae3 100644 --- a/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs +++ b/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs @@ -41,7 +41,12 @@ impl IndexScheduler { progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); let dst = temp_snapshot_dir.path().join("tasks"); fs::create_dir_all(&dst)?; - self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?; + let compaction_option = if self.scheduler.experimental_no_snapshot_compaction { + CompactionOption::Disabled + } else { + CompactionOption::Enabled + }; + self.env.copy_to_path(dst.join("data.mdb"), compaction_option)?; // 2.2 Create a read transaction on the index-scheduler let rtxn = self.env.read_txn()?; @@ -80,7 +85,7 @@ impl IndexScheduler { let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); fs::create_dir_all(&dst)?; index - .copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled) + .copy_to_path(dst.join("data.mdb"), compaction_option) .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; } @@ -90,7 +95,7 @@ impl IndexScheduler { progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); let dst = temp_snapshot_dir.path().join("auth"); fs::create_dir_all(&dst)?; - self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?; + self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), compaction_option)?; // 5. Copy and tarball the flat snapshot progress.update_progress(SnapshotCreationProgress::CreateTheTarball); diff --git a/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs b/crates/index-scheduler/src/scheduler/process_upgrade.rs similarity index 100% rename from crates/index-scheduler/src/scheduler/process_upgrade/mod.rs rename to crates/index-scheduler/src/scheduler/process_upgrade.rs diff --git a/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-7.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-7.snap new file mode 100644 index 000000000..82134b838 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-7.snap @@ -0,0 +1,17 @@ +--- +source: crates/index-scheduler/src/scheduler/test.rs +expression: config.embedder_options +--- +{ + "Rest": { + "api_key": "My super secret", + "distribution": null, + "dimensions": 4, + "url": "http://localhost:7777", + "request": "{{text}}", + "search_fragments": {}, + "indexing_fragments": {}, + "response": "{{embedding}}", + "headers": {} + } +} diff --git a/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-14.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-14.snap new file mode 100644 index 000000000..19b5cab92 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-14.snap @@ -0,0 +1,12 @@ +--- +source: crates/index-scheduler/src/scheduler/test_embedders.rs +expression: simple_hf_config.embedder_options +--- +{ + "HuggingFace": { + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "distribution": null, + "pooling": "useModel" + } +} diff --git a/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-27.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-27.snap new file mode 100644 index 000000000..0fc8bd531 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-27.snap @@ -0,0 +1,15 @@ +--- +source: crates/index-scheduler/src/scheduler/test_embedders.rs +expression: doc +--- +{ + "doggo": "Intel", + "breed": "beagle", + "_vectors": { + "noise": [ + 0.1, + 0.2, + 0.3 + ] + } +} diff --git a/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-40.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-40.snap new file mode 100644 index 000000000..0942e4d82 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-40.snap @@ -0,0 +1,15 @@ +--- +source: crates/index-scheduler/src/scheduler/test_embedders.rs +expression: doc +--- +{ + "doggo": "kefir", + "breed": "patou", + "_vectors": { + "noise": [ + 0.1, + 0.2, + 0.3 + ] + } +} diff --git a/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap index 19b5cab92..29f35d9c1 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap @@ -1,12 +1,17 @@ --- source: crates/index-scheduler/src/scheduler/test_embedders.rs -expression: simple_hf_config.embedder_options +expression: fakerest_config.embedder_options --- { - "HuggingFace": { - "model": "sentence-transformers/all-MiniLM-L6-v2", - "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "Rest": { + "api_key": "My super secret", "distribution": null, - "pooling": "useModel" + "dimensions": 384, + "url": "http://localhost:7777", + "request": "{{text}}", + "search_fragments": {}, + "indexing_fragments": {}, + "response": "{{embedding}}", + "headers": {} } } diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap index 3a31c50c9..168b01b29 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap @@ -39,7 +39,7 @@ catto [0,] [timestamp] [0,1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0,"matchedTasks":1,"canceledTasks":1,"originalFilter":"test_query"}, stats: {"totalNbTasks":2,"status":{"succeeded":1,"canceled":1},"types":{"documentAdditionOrUpdate":1,"taskCancelation":1},"indexUids":{"catto":1}}, stop reason: "task with id 1 of type `taskCancelation` cannot be batched", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0,"matchedTasks":1,"canceledTasks":1,"originalFilter":"test_query"}, stats: {"totalNbTasks":2,"status":{"succeeded":1,"canceled":1},"types":{"documentAdditionOrUpdate":1,"taskCancelation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `taskCancelation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap index fa6d17476..4d41a9807 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap @@ -50,7 +50,7 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} } ---------------------------------------------------------------------- ### All Batches: 0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks for index `catto`", } -1 {uid: 1, details: {"receivedDocuments":2,"indexedDocuments":0,"matchedTasks":3,"canceledTasks":2,"originalFilter":"test_query"}, stats: {"totalNbTasks":3,"status":{"succeeded":1,"canceled":2},"types":{"documentAdditionOrUpdate":2,"taskCancelation":1},"indexUids":{"beavero":1,"wolfo":1}}, stop reason: "task with id 3 of type `taskCancelation` cannot be batched", } +1 {uid: 1, details: {"receivedDocuments":2,"indexedDocuments":0,"matchedTasks":3,"canceledTasks":2,"originalFilter":"test_query"}, stats: {"totalNbTasks":3,"status":{"succeeded":1,"canceled":2},"types":{"documentAdditionOrUpdate":2,"taskCancelation":1},"indexUids":{"beavero":1,"wolfo":1}}, stop reason: "created batch containing only task with id 3 of type `taskCancelation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap index 9ee3f9816..cbf0b6114 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap @@ -38,7 +38,7 @@ canceled [0,] [timestamp] [0,1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"matchedTasks":1,"canceledTasks":1,"originalFilter":"cancel dump"}, stats: {"totalNbTasks":2,"status":{"succeeded":1,"canceled":1},"types":{"taskCancelation":1,"dumpCreation":1},"indexUids":{}}, stop reason: "task with id 1 of type `taskCancelation` cannot be batched", } +0 {uid: 0, details: {"matchedTasks":1,"canceledTasks":1,"originalFilter":"cancel dump"}, stats: {"totalNbTasks":2,"status":{"succeeded":1,"canceled":1},"types":{"taskCancelation":1,"dumpCreation":1},"indexUids":{}}, stop reason: "created batch containing only task with id 1 of type `taskCancelation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap index 9ca235e15..94efa13ab 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap @@ -4,7 +4,7 @@ source: crates/index-scheduler/src/scheduler/test.rs ### Autobatching Enabled = true ### Processing batch Some(0): [0,] -{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, stop reason: "task with id 0 of type `dumpCreation` cannot be batched", } +{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, stop reason: "created batch containing only task with id 0 of type `dumpCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### All Tasks: 0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }} diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap index 1111edd98..362cd018e 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap @@ -40,7 +40,7 @@ catto: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0,"matchedTasks":1,"canceledTasks":1,"originalFilter":"test_query"}, stats: {"totalNbTasks":2,"status":{"succeeded":1,"canceled":1},"types":{"documentAdditionOrUpdate":1,"taskCancelation":1},"indexUids":{"catto":1}}, stop reason: "task with id 1 of type `taskCancelation` cannot be batched", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0,"matchedTasks":1,"canceledTasks":1,"originalFilter":"test_query"}, stats: {"totalNbTasks":2,"status":{"succeeded":1,"canceled":1},"types":{"documentAdditionOrUpdate":1,"taskCancelation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `taskCancelation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap index d4dc3452f..91291fe62 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap @@ -41,7 +41,7 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} } ---------------------------------------------------------------------- ### All Batches: 0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, stop reason: "batched all enqueued tasks", } -1 {uid: 1, details: {"matchedTasks":1,"canceledTasks":0,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskCancelation":1},"indexUids":{}}, stop reason: "task with id 1 of type `taskCancelation` cannot be batched", } +1 {uid: 1, details: {"matchedTasks":1,"canceledTasks":0,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskCancelation":1},"indexUids":{}}, stop reason: "created batch containing only task with id 1 of type `taskCancelation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap index ad3fb246f..ed6e75a3d 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap @@ -60,9 +60,9 @@ girafos: { number_of_documents: 0, field_distribution: {} } [timestamp] [5,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"cattos":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafos":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"cattos":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafos":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } 3 {uid: 3, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks for index `doggos`", } 4 {uid: 4, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"cattos":1}}, stop reason: "batched all enqueued tasks for index `cattos`", } 5 {uid: 5, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"girafos":1}}, stop reason: "batched all enqueued tasks", } diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap index 8a8d58c99..f98e5d308 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap @@ -41,7 +41,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap index 6f13e4492..ae1139c0c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap @@ -42,8 +42,8 @@ doggos [0,1,2,] [timestamp] [1,2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0,"deletedDocuments":0}, stats: {"totalNbTasks":2,"status":{"succeeded":2},"types":{"documentAdditionOrUpdate":1,"indexDeletion":1},"indexUids":{"doggos":2}}, stop reason: "task with id 2 deletes the index", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0,"deletedDocuments":0}, stats: {"totalNbTasks":2,"status":{"succeeded":2},"types":{"documentAdditionOrUpdate":1,"indexDeletion":1},"indexUids":{"doggos":2}}, stop reason: "stopped after task with id 2 because it deletes the index", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap index 93dbc831e..a35727f22 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap @@ -37,7 +37,7 @@ doggos [0,1,] [timestamp] [0,1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0,"deletedDocuments":0}, stats: {"totalNbTasks":2,"status":{"succeeded":2},"types":{"documentAdditionOrUpdate":1,"indexDeletion":1},"indexUids":{"doggos":2}}, stop reason: "task with id 1 deletes the index", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0,"deletedDocuments":0}, stats: {"totalNbTasks":2,"status":{"succeeded":2},"types":{"documentAdditionOrUpdate":1,"indexDeletion":1},"indexUids":{"doggos":2}}, stop reason: "stopped after task with id 1 because it deletes the index", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap index e9b3e0285..17b69061a 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap @@ -4,7 +4,7 @@ source: crates/index-scheduler/src/scheduler/test.rs ### Autobatching Enabled = true ### Processing batch Some(0): [0,] -{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### All Tasks: 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap index 24e885c46..c8a407554 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap @@ -4,7 +4,7 @@ source: crates/index-scheduler/src/scheduler/test.rs ### Autobatching Enabled = true ### Processing batch Some(0): [0,] -{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### All Tasks: 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap index 6ef7b5a38..0cae69a70 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap @@ -4,7 +4,7 @@ source: crates/index-scheduler/src/scheduler/test.rs ### Autobatching Enabled = true ### Processing batch Some(0): [0,] -{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### All Tasks: 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap index ae64e6fbd..c5e3e66c8 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap @@ -41,7 +41,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap index 8b099633a..8da1b6ca8 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap @@ -44,8 +44,8 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"cattos":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"cattos":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap index e809b28cb..8ee0bfcef 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap @@ -45,9 +45,9 @@ cattos: { number_of_documents: 0, field_distribution: {} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"cattos":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexDeletion":1},"indexUids":{"doggos":1}}, stop reason: "task with id 2 deletes the index", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"cattos":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexDeletion":1},"indexUids":{"doggos":1}}, stop reason: "stopped after task with id 2 because it deletes the index", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap index bf183802b..a7215f32c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap @@ -42,7 +42,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap index 03f29dd0e..1c14b091f 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap @@ -48,9 +48,9 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } 3 {uid: 3, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap index 2ff261d43..da91440ab 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap @@ -44,8 +44,8 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap index c9663ca65..95bc1f7f5 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap @@ -46,9 +46,9 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap index bc6e9b8b4..4878bbe28 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap @@ -44,7 +44,7 @@ a: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap index b6af0c6a6..5a851f373 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap @@ -47,8 +47,8 @@ b: { number_of_documents: 0, field_distribution: {} } [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap index d3f714ace..dad7609d2 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap @@ -50,9 +50,9 @@ c: { number_of_documents: 0, field_distribution: {} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap index 15a1cf2ae..ee0a12692 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap @@ -53,10 +53,10 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap index 6b9899418..39d1b3339 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap @@ -60,11 +60,11 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } -4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","d"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 4 of type `indexSwap` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } +4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","d"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 4 of type `indexSwap` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap index 3091b061b..5d292fe21 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap @@ -56,10 +56,10 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap index 20e9d1076..9327015c4 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap @@ -62,12 +62,12 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [5,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } -4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","d"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 4 of type `indexSwap` cannot be batched", } -5 {uid: 5, details: {"swaps":[{"indexes":["a","c"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 5 of type `indexSwap` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } +4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","d"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 4 of type `indexSwap` that cannot be batched with any other task.", } +5 {uid: 5, details: {"swaps":[{"indexes":["a","c"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 5 of type `indexSwap` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap index 27e42139c..f85735397 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap @@ -66,13 +66,13 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [6,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } -4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","d"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 4 of type `indexSwap` cannot be batched", } -5 {uid: 5, details: {"swaps":[{"indexes":["a","c"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 5 of type `indexSwap` cannot be batched", } -6 {uid: 6, details: {"swaps":[]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 6 of type `indexSwap` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } +4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","d"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 4 of type `indexSwap` that cannot be batched with any other task.", } +5 {uid: 5, details: {"swaps":[{"indexes":["a","c"]}]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 5 of type `indexSwap` that cannot be batched with any other task.", } +6 {uid: 6, details: {"swaps":[]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 6 of type `indexSwap` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap index aa8fb4dc1..46d70dceb 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap @@ -58,10 +58,10 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap index 15a1cf2ae..ee0a12692 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap @@ -53,10 +53,10 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap index 1e4a4a6f3..da9340f3b 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap @@ -61,11 +61,11 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } -4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","e"]},{"indexes":["d","f"]}]}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "task with id 4 of type `indexSwap` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } +4 {uid: 4, details: {"swaps":[{"indexes":["a","b"]},{"indexes":["c","e"]},{"indexes":["d","f"]}]}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexSwap":1},"indexUids":{}}, stop reason: "created batch containing only task with id 4 of type `indexSwap` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap index 15a1cf2ae..ee0a12692 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap @@ -53,10 +53,10 @@ d: { number_of_documents: 0, field_distribution: {} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"a":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"b":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"c":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"d":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap index d5143a4a3..f6182e515 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap @@ -40,7 +40,7 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} } [timestamp] [2,3,] ---------------------------------------------------------------------- ### All Batches: -1 {uid: 1, details: {"matchedTasks":2,"deletedTasks":1,"originalFilter":"test_query&test_query"}, stats: {"totalNbTasks":2,"status":{"succeeded":2},"types":{"taskDeletion":2},"indexUids":{}}, stop reason: "a batch of tasks of type `taskDeletion` cannot be batched with any other type of task", } +1 {uid: 1, details: {"matchedTasks":2,"deletedTasks":1,"originalFilter":"test_query&test_query"}, stats: {"totalNbTasks":2,"status":{"succeeded":2},"types":{"taskDeletion":2},"indexUids":{}}, stop reason: "stopped after the last task of type `taskDeletion` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 1 [2,3,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap index 414d2e488..cae9c296b 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap @@ -38,7 +38,7 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -1 {uid: 1, details: {"matchedTasks":1,"deletedTasks":1,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "a batch of tasks of type `taskDeletion` cannot be batched with any other type of task", } +1 {uid: 1, details: {"matchedTasks":1,"deletedTasks":1,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "stopped after the last task of type `taskDeletion` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 1 [2,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap index 0a24f1993..3b89fe1e7 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap @@ -43,7 +43,7 @@ doggo [2,] [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"matchedTasks":2,"deletedTasks":0,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "a batch of tasks of type `taskDeletion` cannot be batched with any other type of task", } +0 {uid: 0, details: {"matchedTasks":2,"deletedTasks":0,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "stopped after the last task of type `taskDeletion` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [3,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap index 33b65bfe4..d8abc1314 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap @@ -4,7 +4,7 @@ source: crates/index-scheduler/src/scheduler/test.rs ### Autobatching Enabled = true ### Processing batch Some(0): [3,] -{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "a batch of tasks of type `taskDeletion` cannot be batched with any other type of task", } +{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "stopped after the last task of type `taskDeletion` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### All Tasks: 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_scheduler_doesnt_run_with_zero_batched_tasks/after_restart.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_scheduler_doesnt_run_with_zero_batched_tasks/after_restart.snap new file mode 100644 index 000000000..1dde1a394 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_scheduler_doesnt_run_with_zero_batched_tasks/after_restart.snap @@ -0,0 +1,63 @@ +--- +source: crates/index-scheduler/src/scheduler/test.rs +--- +### Autobatching Enabled = true +### Processing batch None: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 0, field_distribution: {} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### All Batches: +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +---------------------------------------------------------------------- +### Batch to tasks mapping: +0 [0,] +---------------------------------------------------------------------- +### Batches Status: +succeeded [0,] +---------------------------------------------------------------------- +### Batches Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Batches Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Batches Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Batches Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Batches Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_scheduler_doesnt_run_with_zero_batched_tasks/registered_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_scheduler_doesnt_run_with_zero_batched_tasks/registered_task.snap new file mode 100644 index 000000000..dd1d76f55 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_scheduler_doesnt_run_with_zero_batched_tasks/registered_task.snap @@ -0,0 +1,51 @@ +--- +source: crates/index-scheduler/src/scheduler/test.rs +--- +### Autobatching Enabled = true +### Processing batch None: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### All Batches: +---------------------------------------------------------------------- +### Batch to tasks mapping: +---------------------------------------------------------------------- +### Batches Status: +---------------------------------------------------------------------- +### Batches Kind: +---------------------------------------------------------------------- +### Batches Index Tasks: +---------------------------------------------------------------------- +### Batches Enqueued At: +---------------------------------------------------------------------- +### Batches Started At: +---------------------------------------------------------------------- +### Batches Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap index d9d8b0724..a52f18079 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap index 35eb3f162..b99e15852 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap index b83fa60f1..7ad5046e1 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap @@ -37,7 +37,7 @@ doggos [0,1,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"providedIds":2,"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 has different index creation rules as in the batch", } +0 {uid: 0, details: {"providedIds":2,"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its index creation rules differ from the ones from the batch", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap index f97b536f0..9d94cffaf 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap @@ -41,7 +41,7 @@ doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, " [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"providedIds":2,"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 has different index creation rules as in the batch", } +0 {uid: 0, details: {"providedIds":2,"deletedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentDeletion":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its index creation rules differ from the ones from the batch", } 1 {uid: 1, details: {"receivedDocuments":3,"indexedDocuments":3}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap index 0f9d93068..f8caaa995 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap @@ -58,7 +58,7 @@ doggos: { number_of_documents: 10, field_distribution: {"doggo": 10, "id": 10} } [timestamp] [1,2,3,4,5,6,7,8,9,10,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } 1 {uid: 1, details: {"receivedDocuments":10,"indexedDocuments":10}, stats: {"totalNbTasks":10,"status":{"succeeded":10},"types":{"documentAdditionOrUpdate":10},"indexUids":{"doggos":10}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap index df75e6cf0..d987d66c0 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap @@ -56,7 +56,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap index f8bcf6646..d1369460f 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap @@ -35,7 +35,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap index 091346cc0..136777fcf 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap @@ -56,7 +56,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap index 8c30b10fa..0b4fc96b5 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap @@ -76,16 +76,16 @@ doggos: { number_of_documents: 10, field_distribution: {"doggo": 10, "id": 10} } [timestamp] [10,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -9 {uid: 9, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +9 {uid: 9, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } 10 {uid: 10, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap index 34e09660d..d938ca288 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap @@ -66,12 +66,12 @@ doggos: { number_of_documents: 5, field_distribution: {"doggo": 5, "id": 5} } [timestamp] [5,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap index 38081c7d7..2d936ba68 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap @@ -35,7 +35,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap index 2b5a673d6..fc16063e7 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap @@ -70,15 +70,15 @@ doggos [0,1,2,3,4,5,6,7,8,9,] [timestamp] [9,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } 9 {uid: 9, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap index e2ecfcdab..19d45a0d3 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap @@ -60,11 +60,11 @@ doggos [0,1,2,3,4,5,6,7,8,9,] [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap index db8192fc6..d007e673a 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap @@ -56,7 +56,7 @@ doggos: { number_of_documents: 9, field_distribution: {"doggo": 9, "id": 9} } [timestamp] [1,2,3,4,5,6,7,8,9,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 has different index creation rules as in the batch", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its index creation rules differ from the ones from the batch", } 1 {uid: 1, details: {"receivedDocuments":9,"indexedDocuments":9}, stats: {"totalNbTasks":9,"status":{"succeeded":9},"types":{"documentAdditionOrUpdate":9},"indexUids":{"doggos":9}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap index 2e23d7cbf..57f7be034 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap @@ -52,7 +52,7 @@ doggos [0,1,2,3,4,5,6,7,8,9,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 has different index creation rules as in the batch", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its index creation rules differ from the ones from the batch", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap index 5cf951bfd..6add8a2a5 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap @@ -56,7 +56,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap index 274d93f7e..197ed0679 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap @@ -58,7 +58,7 @@ doggos: { number_of_documents: 10, field_distribution: {"doggo": 10, "id": 10} } [timestamp] [1,2,3,4,5,6,7,8,9,10,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } 1 {uid: 1, details: {"receivedDocuments":10,"indexedDocuments":10}, stats: {"totalNbTasks":10,"status":{"succeeded":10},"types":{"documentAdditionOrUpdate":10},"indexUids":{"doggos":10}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap index f8bcf6646..d1369460f 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap @@ -35,7 +35,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggos":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap index d8090b209..f0807b5fd 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap @@ -50,9 +50,9 @@ doggos: { number_of_documents: 2, field_distribution: {"doggo": 2, "id": 2} } [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "primary key `id` in task with id 2 is different from the primary key of the batch `bork`", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 3 is different from the primary key of the batch `id`", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 3 is different from the primary key of the index `id`", } +0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "stopped batching before task with id 2 because its primary key `id` is different from the primary key of the batch `bork`", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 3 because its primary key `bork` is different from the primary key of the batch `id`", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 3 because its primary key `bork` is different from the primary key of the index `id`", } 3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap index 9707adba1..17a6ce83e 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap @@ -43,7 +43,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "primary key `id` in task with id 2 is different from the primary key of the batch `bork`", } +0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "stopped batching before task with id 2 because its primary key `id` is different from the primary key of the batch `bork`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap index df012bc42..7f5c4bfc7 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap @@ -48,9 +48,9 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [3,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "primary key `id` in task with id 2 is different from the primary key of the batch `bork`", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 3 is different from the primary key of the batch `id`", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 3 is different from the primary key of the index `id`", } +0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "stopped batching before task with id 2 because its primary key `id` is different from the primary key of the batch `bork`", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 3 because its primary key `bork` is different from the primary key of the batch `id`", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 3 because its primary key `bork` is different from the primary key of the index `id`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap index aae598a0e..cfe41f168 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap @@ -46,8 +46,8 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "primary key `id` in task with id 2 is different from the primary key of the batch `bork`", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 3 is different from the primary key of the batch `id`", } +0 {uid: 0, details: {"receivedDocuments":2,"indexedDocuments":0}, stats: {"totalNbTasks":2,"status":{"failed":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, stop reason: "stopped batching before task with id 2 because its primary key `id` is different from the primary key of the batch `bork`", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 3 because its primary key `bork` is different from the primary key of the batch `id`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap index 4c36fcd06..0f7aac17a 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap @@ -39,7 +39,7 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the batch `id`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the batch `id`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap index f7033bee5..2a714664f 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap @@ -42,8 +42,8 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the batch `id`", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the index `id`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the batch `id`", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the index `id`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap index c3360c7cf..3c0cb0add 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap @@ -44,9 +44,9 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [2,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the batch `id`", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the index `id`", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bloup` in task with id 2 is different from the primary key of the index `id`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the batch `id`", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the index `id`", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 2 because its primary key `bloup` is different from the primary key of the index `id`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap index 706f71e53..67637d5ef 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap @@ -39,7 +39,7 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the batch `id`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the batch `id`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap index 0823ebbbf..b552f1abf 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap @@ -42,8 +42,8 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the batch `id`", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the index `id`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the batch `id`", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the index `id`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap index fc58f2b52..0c02c8165 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap @@ -50,8 +50,8 @@ doggos: { number_of_documents: 4, field_distribution: {"doggo": 4, "paw": 4} } [timestamp] [2,3,4,5,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is setting the `bork` primary key but cannot interfere with primary key guessing of the batch", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `paw` in task with id 2 is different from the primary key of the batch `bork`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because it is setting the `bork` primary key and it would interfere with primary key guessing of the batch", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 2 because its primary key `paw` is different from the primary key of the batch `bork`", } 2 {uid: 2, details: {"receivedDocuments":4,"indexedDocuments":4}, stats: {"totalNbTasks":4,"status":{"succeeded":4},"types":{"documentAdditionOrUpdate":4},"indexUids":{"doggos":4}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap index d8271492c..1268e11ef 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap @@ -45,7 +45,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is setting the `bork` primary key but cannot interfere with primary key guessing of the batch", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because it is setting the `bork` primary key and it would interfere with primary key guessing of the batch", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap index 484c9ada0..20fdfdcc6 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap @@ -47,8 +47,8 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is setting the `bork` primary key but cannot interfere with primary key guessing of the batch", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `paw` in task with id 2 is different from the primary key of the batch `bork`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because it is setting the `bork` primary key and it would interfere with primary key guessing of the batch", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 2 because its primary key `paw` is different from the primary key of the batch `bork`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap index 5c0046af1..5b2946de9 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap @@ -50,8 +50,8 @@ doggos: { number_of_documents: 5, field_distribution: {"doggo": 5, "doggoid": 5} [timestamp] [2,3,4,5,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is setting the `bork` primary key but cannot interfere with primary key guessing of the batch", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the index `doggoid`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because it is setting the `bork` primary key and it would interfere with primary key guessing of the batch", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the index `doggoid`", } 2 {uid: 2, details: {"receivedDocuments":4,"indexedDocuments":4}, stats: {"totalNbTasks":4,"status":{"succeeded":4},"types":{"documentAdditionOrUpdate":4},"indexUids":{"doggos":4}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap index 5bdbe2943..c71bf9097 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap @@ -45,7 +45,7 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "doggoid": 1} [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is setting the `bork` primary key but cannot interfere with primary key guessing of the batch", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because it is setting the `bork` primary key and it would interfere with primary key guessing of the batch", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap index 849eddebe..5d67361b0 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap @@ -48,8 +48,8 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "doggoid": 1} [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is setting the `bork` primary key but cannot interfere with primary key guessing of the batch", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "primary key `bork` in task with id 1 is different from the primary key of the index `doggoid`", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because it is setting the `bork` primary key and it would interfere with primary key guessing of the batch", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":0}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped batching before task with id 1 because its primary key `bork` is different from the primary key of the index `doggoid`", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap index 6ac284c38..d7218ab46 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap @@ -71,15 +71,15 @@ doggos: { number_of_documents: 10, field_distribution: {"doggo": 10, "id": 10} } [timestamp] [9,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } 9 {uid: 9, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap index ac239f3d6..2ffdfc986 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap @@ -61,11 +61,11 @@ doggos: { number_of_documents: 5, field_distribution: {"doggo": 5, "id": 5} } [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap index 43d09fafa..4987b26f7 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap @@ -71,15 +71,15 @@ doggos: { number_of_documents: 10, field_distribution: {"doggo": 10, "id": 10} } [timestamp] [9,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } 9 {uid: 9, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap index bf0da7815..8b73a24ee 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap @@ -61,11 +61,11 @@ doggos: { number_of_documents: 5, field_distribution: {"doggo": 5, "id": 5} } [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "reached configured batch limit of 1 tasks", } +0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } +4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched up to configured batch limit of 1 tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap index b90d5944a..12e03a28b 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap index 58bf78290..2ea2ebb17 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap index 90ac17702..a2a263b6f 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap index 10f87d389..29fc6abf4 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap index 35bd9dee9..ae943bf48 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap index ec8f387f0..9ada7580a 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap index 4f60fd009..96d93de51 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: @@ -40,7 +40,7 @@ doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, " [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is a document operation in a batch of settings changes", } +0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped before task with id 1 because it is a document operation which cannot be batched with settings changes", } 1 {uid: 1, details: {"receivedDocuments":3,"indexedDocuments":3}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap index d502215b5..76a77e5c0 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: @@ -38,7 +38,7 @@ doggos: { number_of_documents: 0, field_distribution: {} } [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is a document operation in a batch of settings changes", } +0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped before task with id 1 because it is a document operation which cannot be batched with settings changes", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap index 3bfeed9c8..422bed51f 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} @@ -52,7 +52,7 @@ doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } [timestamp] [2,3,4,5,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is a document operation in a batch of settings changes", } +0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped before task with id 1 because it is a document operation which cannot be batched with settings changes", } 1 {uid: 1, details: {"receivedDocuments":3,"indexedDocuments":3}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } 2 {uid: 2, details: {"providedIds":1,"deletedDocuments":2,"originalFilter":"true&\"id = 2\"&\"catto EXISTS\""}, stats: {"totalNbTasks":4,"status":{"succeeded":2,"failed":2},"types":{"documentDeletion":4},"indexUids":{"doggos":4}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap index 8b55d9796..d8996f82c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} @@ -49,7 +49,7 @@ doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, " [timestamp] [1,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "task with id 1 is a document operation in a batch of settings changes", } +0 {uid: 0, details: {"filterableAttributes":["catto"]}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"settingsUpdate":1},"indexUids":{"doggos":1}}, stop reason: "stopped before task with id 1 because it is a document operation which cannot be batched with settings changes", } 1 {uid: 1, details: {"receivedDocuments":3,"indexedDocuments":3}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, stop reason: "batched all enqueued tasks", } ---------------------------------------------------------------------- ### Batch to tasks mapping: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap index 0ba3ef598..e7b06eb31 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap index 7db4a4edf..8feeaf990 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap @@ -34,7 +34,7 @@ catto [0,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap index 77a444451..201680d7a 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap @@ -34,7 +34,7 @@ catto [0,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 0 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 0 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap index 178ec8166..0b5d4409d 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} @@ -57,11 +57,11 @@ girafo: { number_of_documents: 0, field_distribution: {} } [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task", } -1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } -4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, stop reason: "task with id 4 of type `indexCreation` cannot be batched", } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", } +1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } +4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, stop reason: "created batch containing only task with id 4 of type `indexCreation` that cannot be batched with any other task.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_removing_the_upgrade_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_removing_the_upgrade_tasks.snap index 26984f2e5..fb682053c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_removing_the_upgrade_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_removing_the_upgrade_tasks.snap @@ -58,11 +58,11 @@ girafo: { number_of_documents: 0, field_distribution: {} } [timestamp] [5,] ---------------------------------------------------------------------- ### All Batches: -1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "task with id 1 of type `indexCreation` cannot be batched", } -2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 2 of type `indexCreation` cannot be batched", } -3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "task with id 3 of type `indexCreation` cannot be batched", } -4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, stop reason: "task with id 4 of type `indexCreation` cannot be batched", } -5 {uid: 5, details: {"matchedTasks":1,"deletedTasks":1,"originalFilter":"types=upgradeDatabase"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "a batch of tasks of type `taskDeletion` cannot be batched with any other type of task", } +1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", } +2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", } +3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", } +4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, stop reason: "created batch containing only task with id 4 of type `indexCreation` that cannot be batched with any other task.", } +5 {uid: 5, details: {"matchedTasks":1,"deletedTasks":1,"originalFilter":"types=upgradeDatabase"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, stop reason: "stopped after the last task of type `taskDeletion` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 1 [1,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap index 37bb9d78e..0bfb9c6da 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap index fd8656c42..8d374479b 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap index 899a507f5..9fc28abbe 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} ---------------------------------------------------------------------- ### Status: @@ -37,7 +37,7 @@ catto [1,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task", } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap index e3244fc28..33ddf7193 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} ---------------------------------------------------------------------- @@ -40,7 +40,7 @@ doggo [2,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task", } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap index 9d78f6bbf..05d366d1e 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} @@ -43,7 +43,7 @@ doggo [2,3,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task", } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/test.rs b/crates/index-scheduler/src/scheduler/test.rs index 84112de08..e9f21dfe4 100644 --- a/crates/index-scheduler/src/scheduler/test.rs +++ b/crates/index-scheduler/src/scheduler/test.rs @@ -3,11 +3,11 @@ use std::collections::BTreeMap; use big_s::S; use meili_snap::{json_string, snapshot}; use meilisearch_auth::AuthFilter; -use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::update::IndexDocumentsMethod::*; use meilisearch_types::milli::{self}; use meilisearch_types::settings::SettingEmbeddingSettings; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; +use milli::vector::db::IndexEmbeddingConfig; use roaring::RoaringBitmap; use crate::insta_snapshot::snapshot_index_scheduler; @@ -690,11 +690,20 @@ fn test_settings_update() { let index = index_scheduler.index("doggos").unwrap(); let rtxn = index.read_txn().unwrap(); - let configs = index.embedding_configs(&rtxn).unwrap(); - let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap(); + let embedders = index.embedding_configs(); + let configs = embedders.embedding_configs(&rtxn).unwrap(); + let IndexEmbeddingConfig { name, config, fragments } = configs.first().unwrap(); + let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"0"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>"); insta::assert_snapshot!(name, @"default"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); insta::assert_json_snapshot!(config.embedder_options); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); } #[test] @@ -732,6 +741,7 @@ fn basic_get_stats() { "documentDeletion": 0, "documentEdition": 0, "dumpCreation": 0, + "export": 0, "indexCreation": 3, "indexDeletion": 0, "indexSwap": 0, @@ -765,6 +775,7 @@ fn basic_get_stats() { "documentDeletion": 0, "documentEdition": 0, "dumpCreation": 0, + "export": 0, "indexCreation": 3, "indexDeletion": 0, "indexSwap": 0, @@ -805,6 +816,7 @@ fn basic_get_stats() { "documentDeletion": 0, "documentEdition": 0, "dumpCreation": 0, + "export": 0, "indexCreation": 3, "indexDeletion": 0, "indexSwap": 0, @@ -846,6 +858,7 @@ fn basic_get_stats() { "documentDeletion": 0, "documentEdition": 0, "dumpCreation": 0, + "export": 0, "indexCreation": 3, "indexDeletion": 0, "indexSwap": 0, @@ -894,7 +907,7 @@ fn create_and_list_index() { let err = index_scheduler.index("kefir").map(|_| ()).unwrap_err(); snapshot!(err, @"Index `kefir` not found."); - let empty = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); + let empty = index_scheduler.paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); snapshot!(format!("{empty:?}"), @"(0, [])"); // After advancing just once the index should've been created, the wtxn has been released and commited @@ -902,7 +915,7 @@ fn create_and_list_index() { handle.advance_till([InsideProcessBatch]); index_scheduler.index("kefir").unwrap(); - let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); + let list = index_scheduler.paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###" [ 1, @@ -929,3 +942,30 @@ fn create_and_list_index() { ] "###); } + +#[test] +fn test_scheduler_doesnt_run_with_zero_batched_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { + config.max_number_of_batched_tasks = 0; + None + }); + + handle.scheduler_is_down(); + + // Register a task + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_task"); + + handle.scheduler_is_down(); + + // If we restart the scheduler, it should run properly. + let (index_scheduler, mut handle) = handle.restart(index_scheduler, true, vec![], |_| None); + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_restart"); +} diff --git a/crates/index-scheduler/src/scheduler/test_embedders.rs b/crates/index-scheduler/src/scheduler/test_embedders.rs index 772aa1520..a9b920bd2 100644 --- a/crates/index-scheduler/src/scheduler/test_embedders.rs +++ b/crates/index-scheduler/src/scheduler/test_embedders.rs @@ -3,13 +3,14 @@ use std::collections::BTreeMap; use big_s::S; use insta::assert_json_snapshot; use meili_snap::{json_string, snapshot}; -use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::vector::settings::EmbeddingSettings; +use meilisearch_types::milli::vector::SearchQuery; use meilisearch_types::milli::{self, obkv_to_json}; use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; use milli::update::IndexDocumentsMethod::*; +use milli::vector::db::IndexEmbeddingConfig; use crate::insta_snapshot::snapshot_index_scheduler; use crate::test_utils::read_json; @@ -85,28 +86,51 @@ fn import_vectors() { let index = index_scheduler.index("doggos").unwrap(); let rtxn = index.read_txn().unwrap(); - let configs = index.embedding_configs(&rtxn).unwrap(); + let embedders = index.embedding_configs(); + let configs = embedders.embedding_configs(&rtxn).unwrap(); // for consistency with the below #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } = + let IndexEmbeddingConfig { name, config: fakerest_config, fragments } = configs.get(0).unwrap(); + let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"0"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>"); insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); insta::assert_json_snapshot!(fakerest_config.embedder_options); let fakerest_name = name.clone(); - let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } = + let IndexEmbeddingConfig { name, config: simple_hf_config, fragments } = configs.get(1).unwrap(); + let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"1"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>"); insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); insta::assert_json_snapshot!(simple_hf_config.embedder_options); let simple_hf_name = name.clone(); let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); - let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); - let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap(); - let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap(); - let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap(); + let hf_runtime = configs.get(&simple_hf_name).unwrap(); + let hf_embedder = &hf_runtime.embedder; + let beagle_embed = hf_embedder + .embed_search(SearchQuery::Text("Intel the beagle best doggo"), None) + .unwrap(); + let lab_embed = + hf_embedder.embed_search(SearchQuery::Text("Max the lab best doggo"), None).unwrap(); + let patou_embed = hf_embedder + .embed_search(SearchQuery::Text("kefir the patou best doggo"), None) + .unwrap(); (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) }; @@ -166,22 +190,38 @@ fn import_vectors() { let rtxn = index.read_txn().unwrap(); // Ensure the document have been inserted into the relevant bitamp - let configs = index.embedding_configs(&rtxn).unwrap(); + let embedders = index.embedding_configs(); + let configs = embedders.embedding_configs(&rtxn).unwrap(); // for consistency with the below #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = - configs.get(0).unwrap(); + let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap(); + let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"0"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>"); insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); - let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); + let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap(); + let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"1"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>"); insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); let embeddings = index.embeddings(&rtxn, 0).unwrap(); - assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); - assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); + assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true"); + assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true"); let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); @@ -239,25 +279,41 @@ fn import_vectors() { let index = index_scheduler.index("doggos").unwrap(); let rtxn = index.read_txn().unwrap(); + let embedders = index.embedding_configs(); // Ensure the document have been inserted into the relevant bitamp - let configs = index.embedding_configs(&rtxn).unwrap(); + let configs = embedders.embedding_configs(&rtxn).unwrap(); // for consistency with the below #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = - configs.get(0).unwrap(); + let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap(); + let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"0"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>"); insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); - let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); + let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap(); + let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"1"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>"); insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); let embeddings = index.embeddings(&rtxn, 0).unwrap(); // automatically changed to patou because set to regenerate - assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); + assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true"); // remained beagle - assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); + assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true"); let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); @@ -399,8 +455,8 @@ fn import_vectors_first_and_embedder_later() { .collect::>(); // the all the vectors linked to the new specified embedder have been removed // Only the unknown embedders stays in the document DB - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###); - let conf = index.embedding_configs(&rtxn).unwrap(); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###); + let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap(); // even though we specified the vector for the ID 3, it shouldn't be marked // as user provided since we explicitely marked it as NOT user provided. snapshot!(format!("{conf:#?}"), @r###" @@ -426,19 +482,28 @@ fn import_vectors_first_and_embedder_later() { }, quantized: None, }, - user_provided: RoaringBitmap<[1, 2]>, + fragments: FragmentConfigs( + [], + ), }, ] "###); + let info = + index.embedding_configs().embedder_info(&rtxn, "my_doggo_embedder").unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"0"); + + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[1, 2, 3]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[1, 2]>"); + let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; + let (embedding, _) = &embeddings["my_doggo_embedder"]; assert!(!embedding.is_empty(), "{embedding:?}"); // the document with the id 3 should keep its original embedding let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embeddings = &embeddings["my_doggo_embedder"]; + let (embeddings, _) = &embeddings["my_doggo_embedder"]; snapshot!(embeddings.len(), @"1"); assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); @@ -493,7 +558,7 @@ fn import_vectors_first_and_embedder_later() { "###); let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; + let (embedding, _) = &embeddings["my_doggo_embedder"]; assert!(!embedding.is_empty()); assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]); @@ -501,7 +566,7 @@ fn import_vectors_first_and_embedder_later() { // the document with the id 4 should generate an embedding let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap(); let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; + let (embedding, _) = &embeddings["my_doggo_embedder"]; assert!(!embedding.is_empty()); } @@ -603,33 +668,35 @@ fn delete_document_containing_vector() { .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) .collect::>(); snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###); - let conf = index.embedding_configs(&rtxn).unwrap(); + let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap(); snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "manual", - config: EmbeddingConfig { - embedder_options: UserProvided( - EmbedderOptions { - dimensions: 3, - distribution: None, - }, - ), - prompt: PromptData { - template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", - max_bytes: Some( - 400, - ), + [ + IndexEmbeddingConfig { + name: "manual", + config: EmbeddingConfig { + embedder_options: UserProvided( + EmbedderOptions { + dimensions: 3, + distribution: None, }, - quantized: None, + ), + prompt: PromptData { + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), }, - user_provided: RoaringBitmap<[0]>, + quantized: None, }, - ] - "###); + fragments: FragmentConfigs( + [], + ), + }, + ] + "###); let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["manual"]; + let (embedding, _) = &embeddings["manual"]; assert!(!embedding.is_empty(), "{embedding:?}"); index_scheduler @@ -647,30 +714,32 @@ fn delete_document_containing_vector() { .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) .collect::>(); snapshot!(serde_json::to_string(&documents).unwrap(), @"[]"); - let conf = index.embedding_configs(&rtxn).unwrap(); + let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap(); snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "manual", - config: EmbeddingConfig { - embedder_options: UserProvided( - EmbedderOptions { - dimensions: 3, - distribution: None, - }, - ), - prompt: PromptData { - template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", - max_bytes: Some( - 400, - ), + [ + IndexEmbeddingConfig { + name: "manual", + config: EmbeddingConfig { + embedder_options: UserProvided( + EmbedderOptions { + dimensions: 3, + distribution: None, }, - quantized: None, + ), + prompt: PromptData { + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), }, - user_provided: RoaringBitmap<[]>, + quantized: None, }, - ] - "###); + fragments: FragmentConfigs( + [], + ), + }, + ] + "###); } #[test] @@ -800,7 +869,7 @@ fn delete_embedder_with_user_provided_vectors() { .unwrap() .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"regenerate":false,"embeddings":[[0.0,0.0,0.0]]}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"regenerate":false,"embeddings":[[1.0,1.0,1.0]]}}}]"###); } { @@ -835,6 +904,6 @@ fn delete_embedder_with_user_provided_vectors() { .collect::>(); // FIXME: redaction - snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###); + snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[0.0,0.0,0.0]]},\"my_doggo_embedder\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]]}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0]]}}}]""###); } } diff --git a/crates/index-scheduler/src/scheduler/test_failure.rs b/crates/index-scheduler/src/scheduler/test_failure.rs index 191910d38..ad7f22bd8 100644 --- a/crates/index-scheduler/src/scheduler/test_failure.rs +++ b/crates/index-scheduler/src/scheduler/test_failure.rs @@ -2,10 +2,9 @@ use std::time::Instant; use big_s::S; use meili_snap::snapshot; -use meilisearch_types::milli::obkv_to_json; use meilisearch_types::milli::update::IndexDocumentsMethod::*; use meilisearch_types::milli::update::Setting; -use meilisearch_types::milli::FilterableAttributesRule; +use meilisearch_types::milli::{obkv_to_json, FilterableAttributesRule}; use meilisearch_types::tasks::{Kind, KindWithContent}; use crate::insta_snapshot::snapshot_index_scheduler; diff --git a/crates/index-scheduler/src/test_utils.rs b/crates/index-scheduler/src/test_utils.rs index 0d44b3c81..bfed7f53a 100644 --- a/crates/index-scheduler/src/test_utils.rs +++ b/crates/index-scheduler/src/test_utils.rs @@ -37,6 +37,7 @@ pub(crate) enum FailureLocation { InsideCreateBatch, InsideProcessBatch, PanicInsideProcessBatch, + ProcessExport, ProcessUpgrade, AcquiringWtxn, UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, @@ -113,11 +114,15 @@ impl IndexScheduler { instance_features: Default::default(), auto_upgrade: true, // Don't cost much and will ensure the happy path works embedding_cache_cap: 10, + experimental_no_snapshot_compaction: false, }; let version = configuration(&mut options).unwrap_or({ (versioning::VERSION_MAJOR, versioning::VERSION_MINOR, versioning::VERSION_PATCH) }); + // If the number of batched tasks is 0, the scheduler will not run and we can't do the init check. + let skip_init = options.max_number_of_batched_tasks == 0; + std::fs::create_dir_all(&options.auth_path).unwrap(); let auth_env = open_auth_store_env(&options.auth_path).unwrap(); let index_scheduler = @@ -126,7 +131,11 @@ impl IndexScheduler { // To be 100% consistent between all test we're going to start the scheduler right now // and ensure it's in the expected starting state. let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) { + Ok(b) if skip_init => { + panic!("The scheduler was not supposed to start, but it did: {b:?}.") + } Ok(b) => b, + Err(_) if skip_init => (Init, false), Err(RecvTimeoutError::Timeout) => { panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") } diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs index 67e8fc090..3c921f099 100644 --- a/crates/index-scheduler/src/utils.rs +++ b/crates/index-scheduler/src/utils.rs @@ -1,7 +1,9 @@ //! Utility functions on the DBs. Mainly getter and setters. +use crate::milli::progress::EmbedderStats; use std::collections::{BTreeSet, HashSet}; use std::ops::Bound; +use std::sync::Arc; use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats}; use meilisearch_types::heed::{Database, RoTxn, RwTxn}; @@ -27,6 +29,7 @@ pub struct ProcessingBatch { pub uid: BatchId, pub details: DetailsView, pub stats: BatchStats, + pub embedder_stats: Arc, pub statuses: HashSet, pub kinds: HashSet, @@ -48,6 +51,7 @@ impl ProcessingBatch { uid, details: DetailsView::default(), stats: BatchStats::default(), + embedder_stats: Default::default(), statuses, kinds: HashSet::default(), @@ -146,6 +150,7 @@ impl ProcessingBatch { progress: None, details: self.details.clone(), stats: self.stats.clone(), + embedder_stats: self.embedder_stats.as_ref().into(), started_at: self.started_at, finished_at: self.finished_at, enqueued_at: self.enqueued_at, @@ -273,6 +278,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) { K::TaskCancelation { .. } | K::TaskDeletion { .. } | K::DumpCreation { .. } + | K::Export { .. } | K::UpgradeDatabase { .. } | K::SnapshotCreation => (), }; @@ -600,6 +606,9 @@ impl crate::IndexScheduler { Details::Dump { dump_uid: _ } => { assert_eq!(kind.as_kind(), Kind::DumpCreation); } + Details::Export { url: _, api_key: _, payload_size: _, indexes: _ } => { + assert_eq!(kind.as_kind(), Kind::Export); + } Details::UpgradeDatabase { from: _, to: _ } => { assert_eq!(kind.as_kind(), Kind::UpgradeDatabase); } diff --git a/crates/json-depth-checker/Cargo.toml b/crates/json-depth-checker/Cargo.toml index b8162357b..68964354a 100644 --- a/crates/json-depth-checker/Cargo.toml +++ b/crates/json-depth-checker/Cargo.toml @@ -15,7 +15,7 @@ license.workspace = true serde_json = "1.0" [dev-dependencies] -criterion = "0.5.1" +criterion = "0.6.0" [[bench]] name = "depth" diff --git a/crates/meili-snap/Cargo.toml b/crates/meili-snap/Cargo.toml index be96769ab..42b900e5a 100644 --- a/crates/meili-snap/Cargo.toml +++ b/crates/meili-snap/Cargo.toml @@ -14,6 +14,6 @@ license.workspace = true # fixed version due to format breakages in v1.40 insta = { version = "=1.39.0", features = ["json", "redactions"] } md5 = "0.7.0" -once_cell = "1.20" +once_cell = "1.21" regex-lite = "0.1.6" uuid = { version = "1.17.0", features = ["v4"] } diff --git a/crates/meili-snap/src/lib.rs b/crates/meili-snap/src/lib.rs index a59732f04..efe57f8df 100644 --- a/crates/meili-snap/src/lib.rs +++ b/crates/meili-snap/src/lib.rs @@ -43,23 +43,28 @@ pub fn default_snapshot_settings_for_test<'a>( } } - settings.add_dynamic_redaction(".message", uuid_in_message_redaction); - settings.add_dynamic_redaction(".error.message", uuid_in_message_redaction); - settings.add_dynamic_redaction(".indexUid", |content, _content_path| match &content { - Content::String(s) => match uuid::Uuid::parse_str(s) { - Ok(_) => Content::String("[uuid]".to_owned()), - Err(_) => content, - }, - _ => content, - }); - - settings.add_dynamic_redaction(".error.message", |content, _content_path| match &content { - Content::String(s) => { - let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]"); - Content::String(uuid_replaced.to_string()) + fn uuid_in_json_key_redaction(content: Content, _content_path: ContentPath) -> Content { + match content { + Content::Map(map) => { + let new_map = map + .iter() + .map(|(key, value)| match key { + Content::String(s) => { + let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]"); + (Content::String(uuid_replaced.to_string()), value.clone()) + } + _ => (key.clone(), value.clone()), + }) + .collect(); + Content::Map(new_map) + } + _ => content, } - _ => content, - }); + } + + settings.add_dynamic_redaction(".**.message", uuid_in_message_redaction); + settings.add_dynamic_redaction(".**.indexUid", uuid_in_message_redaction); + settings.add_dynamic_redaction(".**.facetsByIndex", uuid_in_json_key_redaction); let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name); let test_name = test_name.rsplit("::").next().unwrap().to_owned(); diff --git a/crates/meilisearch-auth/Cargo.toml b/crates/meilisearch-auth/Cargo.toml index d31effd6e..30eb8125b 100644 --- a/crates/meilisearch-auth/Cargo.toml +++ b/crates/meilisearch-auth/Cargo.toml @@ -17,10 +17,10 @@ hmac = "0.12.1" maplit = "1.0.2" meilisearch-types = { path = "../meilisearch-types" } rand = "0.8.5" -roaring = { version = "0.10.10", features = ["serde"] } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.135", features = ["preserve_order"] } -sha2 = "0.10.8" -thiserror = "2.0.9" -time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] } -uuid = { version = "1.11.0", features = ["serde", "v4"] } +roaring = { version = "0.10.12", features = ["serde"] } +serde = { version = "1.0.219", features = ["derive"] } +serde_json = { version = "1.0.140", features = ["preserve_order"] } +sha2 = "0.10.9" +thiserror = "2.0.12" +time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] } +uuid = { version = "1.17.0", features = ["serde", "v4"] } diff --git a/crates/meilisearch-auth/src/lib.rs b/crates/meilisearch-auth/src/lib.rs index 01c986d9f..27d163192 100644 --- a/crates/meilisearch-auth/src/lib.rs +++ b/crates/meilisearch-auth/src/lib.rs @@ -165,6 +165,7 @@ impl AuthController { } } +#[derive(Debug)] pub struct AuthFilter { search_rules: Option, key_authorized_indexes: SearchRules, @@ -349,6 +350,7 @@ pub struct IndexSearchRules { } fn generate_default_keys(store: &HeedAuthStore) -> Result<()> { + store.put_api_key(Key::default_chat())?; store.put_api_key(Key::default_admin())?; store.put_api_key(Key::default_search())?; diff --git a/crates/meilisearch-auth/src/store.rs b/crates/meilisearch-auth/src/store.rs index 2fd380194..bae27afe4 100644 --- a/crates/meilisearch-auth/src/store.rs +++ b/crates/meilisearch-auth/src/store.rs @@ -125,6 +125,12 @@ impl HeedAuthStore { Action::MetricsAll => { actions.insert(Action::MetricsGet); } + Action::ChatsAll => { + actions.extend([Action::ChatsGet, Action::ChatsDelete]); + } + Action::ChatsSettingsAll => { + actions.extend([Action::ChatsSettingsGet, Action::ChatsSettingsUpdate]); + } other => { actions.insert(*other); } diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml index 55b54ecc7..faf59643f 100644 --- a/crates/meilisearch-types/Cargo.toml +++ b/crates/meilisearch-types/Cargo.toml @@ -11,37 +11,38 @@ edition.workspace = true license.workspace = true [dependencies] -actix-web = { version = "4.9.0", default-features = false } -anyhow = "1.0.95" -bumpalo = "3.16.0" +actix-web = { version = "4.11.0", default-features = false } +anyhow = "1.0.98" +bumpalo = "3.18.1" bumparaw-collections = "0.1.4" -convert_case = "0.6.0" +byte-unit = { version = "5.1.6", features = ["serde"] } +convert_case = "0.8.0" csv = "1.3.1" deserr = { version = "0.6.3", features = ["actix-web"] } -either = { version = "1.13.0", features = ["serde"] } +either = { version = "1.15.0", features = ["serde"] } enum-iterator = "2.1.0" file-store = { path = "../file-store" } -flate2 = "1.0.35" +flate2 = "1.1.2" fst = "0.4.7" memmap2 = "0.9.5" milli = { path = "../milli" } -roaring = { version = "0.10.10", features = ["serde"] } -rustc-hash = "2.1.0" -serde = { version = "1.0.217", features = ["derive"] } +roaring = { version = "0.10.12", features = ["serde"] } +rustc-hash = "2.1.1" +serde = { version = "1.0.219", features = ["derive"] } serde-cs = "0.2.4" -serde_json = { version = "1.0.135", features = ["preserve_order"] } -tar = "0.4.43" -tempfile = "3.15.0" -thiserror = "2.0.9" -time = { version = "0.3.37", features = [ +serde_json = { version = "1.0.140", features = ["preserve_order"] } +tar = "0.4.44" +tempfile = "3.20.0" +thiserror = "2.0.12" +time = { version = "0.3.41", features = [ "serde-well-known", "formatting", "parsing", "macros", ] } -tokio = "1.43" -utoipa = { version = "5.3.1", features = ["macros"] } -uuid = { version = "1.11.0", features = ["serde", "v4"] } +tokio = "1.45" +utoipa = { version = "5.4.0", features = ["macros"] } +uuid = { version = "1.17.0", features = ["serde", "v4"] } [dev-dependencies] # fixed version due to format breakages in v1.40 diff --git a/crates/meilisearch-types/src/batch_view.rs b/crates/meilisearch-types/src/batch_view.rs index 791e1d4ec..297b10ba1 100644 --- a/crates/meilisearch-types/src/batch_view.rs +++ b/crates/meilisearch-types/src/batch_view.rs @@ -3,7 +3,7 @@ use serde::Serialize; use time::{Duration, OffsetDateTime}; use utoipa::ToSchema; -use crate::batches::{Batch, BatchId, BatchStats}; +use crate::batches::{Batch, BatchId, BatchStats, EmbedderStatsView}; use crate::task_view::DetailsView; use crate::tasks::serialize_duration; @@ -14,7 +14,7 @@ pub struct BatchView { pub uid: BatchId, pub progress: Option, pub details: DetailsView, - pub stats: BatchStats, + pub stats: BatchStatsView, #[serde(serialize_with = "serialize_duration", default)] pub duration: Option, #[serde(with = "time::serde::rfc3339", default)] @@ -22,7 +22,17 @@ pub struct BatchView { #[serde(with = "time::serde::rfc3339::option", default)] pub finished_at: Option, #[serde(default = "meilisearch_types::batches::default_stop_reason")] - pub batch_creation_complete: String, + pub batch_strategy: String, +} + +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct BatchStatsView { + #[serde(flatten)] + pub stats: BatchStats, + #[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)] + pub embedder_requests: EmbedderStatsView, } impl BatchView { @@ -31,11 +41,14 @@ impl BatchView { uid: batch.uid, progress: batch.progress.clone(), details: batch.details.clone(), - stats: batch.stats.clone(), + stats: BatchStatsView { + stats: batch.stats.clone(), + embedder_requests: batch.embedder_stats.clone(), + }, duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at), started_at: batch.started_at, finished_at: batch.finished_at, - batch_creation_complete: batch.stop_reason.clone(), + batch_strategy: batch.stop_reason.clone(), } } } diff --git a/crates/meilisearch-types/src/batches.rs b/crates/meilisearch-types/src/batches.rs index 4d40189db..e1cc2b7c7 100644 --- a/crates/meilisearch-types/src/batches.rs +++ b/crates/meilisearch-types/src/batches.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use milli::progress::ProgressView; +use milli::progress::{EmbedderStats, ProgressView}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use utoipa::ToSchema; @@ -19,6 +19,8 @@ pub struct Batch { pub progress: Option, pub details: DetailsView, pub stats: BatchStats, + #[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)] + pub embedder_stats: EmbedderStatsView, #[serde(with = "time::serde::rfc3339")] pub started_at: OffsetDateTime, @@ -43,6 +45,7 @@ impl PartialEq for Batch { progress, details, stats, + embedder_stats, started_at, finished_at, enqueued_at, @@ -53,6 +56,7 @@ impl PartialEq for Batch { && progress.is_none() == other.progress.is_none() && details == &other.details && stats == &other.stats + && embedder_stats == &other.embedder_stats && started_at == &other.started_at && finished_at == &other.finished_at && enqueued_at == &other.enqueued_at @@ -83,3 +87,30 @@ pub struct BatchStats { #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")] pub internal_database_sizes: serde_json::Map, } + +#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct EmbedderStatsView { + pub total: usize, + pub failed: usize, + #[serde(skip_serializing_if = "Option::is_none", default)] + pub last_error: Option, +} + +impl From<&EmbedderStats> for EmbedderStatsView { + fn from(stats: &EmbedderStats) -> Self { + let errors = stats.errors.read().unwrap_or_else(|p| p.into_inner()); + Self { + total: stats.total_count.load(std::sync::atomic::Ordering::Relaxed), + failed: errors.1 as usize, + last_error: errors.0.clone(), + } + } +} + +impl EmbedderStatsView { + pub fn skip_serializing(&self) -> bool { + self.total == 0 && self.failed == 0 && self.last_error.is_none() + } +} diff --git a/crates/meilisearch-types/src/deserr/mod.rs b/crates/meilisearch-types/src/deserr/mod.rs index f5ad18d5c..f1470c201 100644 --- a/crates/meilisearch-types/src/deserr/mod.rs +++ b/crates/meilisearch-types/src/deserr/mod.rs @@ -4,9 +4,12 @@ use std::marker::PhantomData; use std::ops::ControlFlow; use deserr::errors::{JsonError, QueryParamError}; -use deserr::{take_cf_content, DeserializeError, IntoValue, MergeWithError, ValuePointerRef}; +use deserr::{ + take_cf_content, DeserializeError, Deserr, IntoValue, MergeWithError, ValuePointerRef, +}; +use milli::update::ChatSettings; -use crate::error::deserr_codes::*; +use crate::error::deserr_codes::{self, *}; use crate::error::{ Code, DeserrParseBoolError, DeserrParseIntError, ErrorCode, InvalidTaskDateError, ParseOffsetDateTimeError, @@ -33,6 +36,7 @@ pub struct DeserrError { pub code: Code, _phantom: PhantomData<(Format, C)>, } + impl DeserrError { pub fn new(msg: String, code: Code) -> Self { Self { msg, code, _phantom: PhantomData } @@ -117,6 +121,16 @@ impl DeserializeError for DeserrQueryParamError { } } +impl Deserr> for ChatSettings { + fn deserialize_from_value( + value: deserr::Value, + location: ValuePointerRef, + ) -> Result> { + Deserr::::deserialize_from_value(value, location) + .map_err(|e| DeserrError::new(e.to_string(), InvalidSettingsIndexChat.error_code())) + } +} + pub fn immutable_field_error(field: &str, accepted: &[&str], code: Code) -> DeserrJsonError { let msg = format!( "Immutable field `{field}`: expected one of {}", diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index 6c547d51e..c57e2d042 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -194,200 +194,229 @@ macro_rules! make_error_codes { // An exhaustive list of all the error codes used by meilisearch. make_error_codes! { -ApiKeyAlreadyExists , InvalidRequest , CONFLICT ; -ApiKeyNotFound , InvalidRequest , NOT_FOUND ; -BadParameter , InvalidRequest , BAD_REQUEST; -BadRequest , InvalidRequest , BAD_REQUEST; -DatabaseSizeLimitReached , Internal , INTERNAL_SERVER_ERROR; -DocumentNotFound , InvalidRequest , NOT_FOUND; -DumpAlreadyProcessing , InvalidRequest , CONFLICT; -DumpNotFound , InvalidRequest , NOT_FOUND; -DumpProcessFailed , Internal , INTERNAL_SERVER_ERROR; -DuplicateIndexFound , InvalidRequest , BAD_REQUEST; -ImmutableApiKeyActions , InvalidRequest , BAD_REQUEST; -ImmutableApiKeyCreatedAt , InvalidRequest , BAD_REQUEST; -ImmutableApiKeyExpiresAt , InvalidRequest , BAD_REQUEST; -ImmutableApiKeyIndexes , InvalidRequest , BAD_REQUEST; -ImmutableApiKeyKey , InvalidRequest , BAD_REQUEST; -ImmutableApiKeyUid , InvalidRequest , BAD_REQUEST; -ImmutableApiKeyUpdatedAt , InvalidRequest , BAD_REQUEST; -ImmutableIndexCreatedAt , InvalidRequest , BAD_REQUEST; -ImmutableIndexUid , InvalidRequest , BAD_REQUEST; -ImmutableIndexUpdatedAt , InvalidRequest , BAD_REQUEST; -IndexAlreadyExists , InvalidRequest , CONFLICT ; -IndexCreationFailed , Internal , INTERNAL_SERVER_ERROR; -IndexNotFound , InvalidRequest , NOT_FOUND; -IndexPrimaryKeyAlreadyExists , InvalidRequest , BAD_REQUEST ; -IndexPrimaryKeyMultipleCandidatesFound, InvalidRequest , BAD_REQUEST; -IndexPrimaryKeyNoCandidateFound , InvalidRequest , BAD_REQUEST ; -Internal , Internal , INTERNAL_SERVER_ERROR ; -InvalidApiKey , Auth , FORBIDDEN ; -InvalidApiKeyActions , InvalidRequest , BAD_REQUEST ; -InvalidApiKeyDescription , InvalidRequest , BAD_REQUEST ; -InvalidApiKeyExpiresAt , InvalidRequest , BAD_REQUEST ; -InvalidApiKeyIndexes , InvalidRequest , BAD_REQUEST ; -InvalidApiKeyLimit , InvalidRequest , BAD_REQUEST ; -InvalidApiKeyName , InvalidRequest , BAD_REQUEST ; -InvalidApiKeyOffset , InvalidRequest , BAD_REQUEST ; -InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ; -InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; -InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ; -InvalidDocumentFields , InvalidRequest , BAD_REQUEST ; -InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ; -MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; -MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ; -InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; -InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; -InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; -InvalidVectorsType , InvalidRequest , BAD_REQUEST ; -InvalidDocumentId , InvalidRequest , BAD_REQUEST ; -InvalidDocumentIds , InvalidRequest , BAD_REQUEST ; -InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; -InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; -InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ; -InvalidSimilarEmbedder , InvalidRequest , BAD_REQUEST ; -InvalidSearchHybridQuery , InvalidRequest , BAD_REQUEST ; -InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; -InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; -InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; -InvalidIndexUid , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; -InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ; -InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ; -InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ; -InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ; -InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; -InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; -InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; -InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; -InvalidSimilarRetrieveVectors , InvalidRequest , BAD_REQUEST ; -InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; -InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ; -InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ; -InvalidSearchRetrieveVectors , InvalidRequest , BAD_REQUEST ; -InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; -InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; -InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; -InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ; -InvalidSearchLocales , InvalidRequest , BAD_REQUEST ; -InvalidFacetSearchExhaustiveFacetCount, InvalidRequest , BAD_REQUEST ; -InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; -InvalidSimilarId , InvalidRequest , BAD_REQUEST ; -InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; -InvalidSimilarFilter , InvalidRequest , BAD_REQUEST ; -InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ; -InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ; -InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ; -InvalidSimilarLimit , InvalidRequest , BAD_REQUEST ; -InvalidSearchLimit , InvalidRequest , BAD_REQUEST ; -InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ; -InvalidSimilarOffset , InvalidRequest , BAD_REQUEST ; -InvalidSearchOffset , InvalidRequest , BAD_REQUEST ; -InvalidSearchPage , InvalidRequest , BAD_REQUEST ; -InvalidSearchQ , InvalidRequest , BAD_REQUEST ; -InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ; -InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ; -FacetSearchDisabled , InvalidRequest , BAD_REQUEST ; -InvalidSearchVector , InvalidRequest , BAD_REQUEST ; -InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ; -InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ; -InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ; -InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; -InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; -InvalidSearchSort , InvalidRequest , BAD_REQUEST ; -InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ; -InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; -InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; -InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; -InvalidSettingsFacetSearch , InvalidRequest , BAD_REQUEST ; -InvalidSettingsPrefixSearch , InvalidRequest , BAD_REQUEST ; -InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; -InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; -InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; -InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; -InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ; -InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ; -InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ; -InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ; -InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ; -InvalidSettingsLocalizedAttributes , InvalidRequest , BAD_REQUEST ; -InvalidState , Internal , INTERNAL_SERVER_ERROR ; -InvalidStoreFile , Internal , INTERNAL_SERVER_ERROR ; -InvalidSwapDuplicateIndexFound , InvalidRequest , BAD_REQUEST ; -InvalidSwapIndexes , InvalidRequest , BAD_REQUEST ; -InvalidTaskAfterEnqueuedAt , InvalidRequest , BAD_REQUEST ; -InvalidTaskAfterFinishedAt , InvalidRequest , BAD_REQUEST ; -InvalidTaskAfterStartedAt , InvalidRequest , BAD_REQUEST ; -InvalidTaskBeforeEnqueuedAt , InvalidRequest , BAD_REQUEST ; -InvalidTaskBeforeFinishedAt , InvalidRequest , BAD_REQUEST ; -InvalidTaskBeforeStartedAt , InvalidRequest , BAD_REQUEST ; -InvalidTaskCanceledBy , InvalidRequest , BAD_REQUEST ; -InvalidTaskFrom , InvalidRequest , BAD_REQUEST ; -InvalidTaskLimit , InvalidRequest , BAD_REQUEST ; -InvalidTaskReverse , InvalidRequest , BAD_REQUEST ; -InvalidTaskStatuses , InvalidRequest , BAD_REQUEST ; -InvalidTaskTypes , InvalidRequest , BAD_REQUEST ; -InvalidTaskUids , InvalidRequest , BAD_REQUEST ; -InvalidBatchUids , InvalidRequest , BAD_REQUEST ; -IoError , System , UNPROCESSABLE_ENTITY; -FeatureNotEnabled , InvalidRequest , BAD_REQUEST ; -MalformedPayload , InvalidRequest , BAD_REQUEST ; -MaxFieldsLimitExceeded , InvalidRequest , BAD_REQUEST ; -MissingApiKeyActions , InvalidRequest , BAD_REQUEST ; -MissingApiKeyExpiresAt , InvalidRequest , BAD_REQUEST ; -MissingApiKeyIndexes , InvalidRequest , BAD_REQUEST ; -MissingAuthorizationHeader , Auth , UNAUTHORIZED ; -MissingContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; -MissingDocumentId , InvalidRequest , BAD_REQUEST ; -MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; -MissingIndexUid , InvalidRequest , BAD_REQUEST ; -MissingMasterKey , Auth , UNAUTHORIZED ; -MissingNetworkUrl , InvalidRequest , BAD_REQUEST ; -MissingPayload , InvalidRequest , BAD_REQUEST ; -MissingSearchHybrid , InvalidRequest , BAD_REQUEST ; -MissingSwapIndexes , InvalidRequest , BAD_REQUEST ; -MissingTaskFilters , InvalidRequest , BAD_REQUEST ; -NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY; -PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ; -RemoteBadResponse , System , BAD_GATEWAY ; -RemoteBadRequest , InvalidRequest , BAD_REQUEST ; -RemoteCouldNotSendRequest , System , BAD_GATEWAY ; -RemoteInvalidApiKey , Auth , FORBIDDEN ; -RemoteRemoteError , System , BAD_GATEWAY ; -RemoteTimeout , System , BAD_GATEWAY ; -TooManySearchRequests , System , SERVICE_UNAVAILABLE ; -TaskNotFound , InvalidRequest , NOT_FOUND ; -TaskFileNotFound , InvalidRequest , NOT_FOUND ; -BatchNotFound , InvalidRequest , NOT_FOUND ; -TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ; -TooManyVectors , InvalidRequest , BAD_REQUEST ; -UnretrievableDocument , Internal , BAD_REQUEST ; -UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; -UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; +ApiKeyAlreadyExists , InvalidRequest , CONFLICT ; +ApiKeyNotFound , InvalidRequest , NOT_FOUND ; +BadParameter , InvalidRequest , BAD_REQUEST; +BadRequest , InvalidRequest , BAD_REQUEST; +DatabaseSizeLimitReached , Internal , INTERNAL_SERVER_ERROR; +DocumentNotFound , InvalidRequest , NOT_FOUND; +DumpAlreadyProcessing , InvalidRequest , CONFLICT; +DumpNotFound , InvalidRequest , NOT_FOUND; +DumpProcessFailed , Internal , INTERNAL_SERVER_ERROR; +DuplicateIndexFound , InvalidRequest , BAD_REQUEST; +ImmutableApiKeyActions , InvalidRequest , BAD_REQUEST; +ImmutableApiKeyCreatedAt , InvalidRequest , BAD_REQUEST; +ImmutableApiKeyExpiresAt , InvalidRequest , BAD_REQUEST; +ImmutableApiKeyIndexes , InvalidRequest , BAD_REQUEST; +ImmutableApiKeyKey , InvalidRequest , BAD_REQUEST; +ImmutableApiKeyUid , InvalidRequest , BAD_REQUEST; +ImmutableApiKeyUpdatedAt , InvalidRequest , BAD_REQUEST; +ImmutableIndexCreatedAt , InvalidRequest , BAD_REQUEST; +ImmutableIndexUid , InvalidRequest , BAD_REQUEST; +ImmutableIndexUpdatedAt , InvalidRequest , BAD_REQUEST; +IndexAlreadyExists , InvalidRequest , CONFLICT ; +IndexCreationFailed , Internal , INTERNAL_SERVER_ERROR; +IndexNotFound , InvalidRequest , NOT_FOUND; +IndexPrimaryKeyAlreadyExists , InvalidRequest , BAD_REQUEST ; +IndexPrimaryKeyMultipleCandidatesFound , InvalidRequest , BAD_REQUEST; +IndexPrimaryKeyNoCandidateFound , InvalidRequest , BAD_REQUEST ; +Internal , Internal , INTERNAL_SERVER_ERROR ; +InvalidApiKey , Auth , FORBIDDEN ; +InvalidApiKeyActions , InvalidRequest , BAD_REQUEST ; +InvalidApiKeyDescription , InvalidRequest , BAD_REQUEST ; +InvalidApiKeyExpiresAt , InvalidRequest , BAD_REQUEST ; +InvalidApiKeyIndexes , InvalidRequest , BAD_REQUEST ; +InvalidApiKeyLimit , InvalidRequest , BAD_REQUEST ; +InvalidApiKeyName , InvalidRequest , BAD_REQUEST ; +InvalidApiKeyOffset , InvalidRequest , BAD_REQUEST ; +InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ; +InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; +InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ; +InvalidDocumentFields , InvalidRequest , BAD_REQUEST ; +InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ; +MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; +MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ; +InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; +InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; +InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; +InvalidVectorsType , InvalidRequest , BAD_REQUEST ; +InvalidDocumentId , InvalidRequest , BAD_REQUEST ; +InvalidDocumentIds , InvalidRequest , BAD_REQUEST ; +InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; +InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; +InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ; +InvalidSimilarEmbedder , InvalidRequest , BAD_REQUEST ; +InvalidSearchHybridQuery , InvalidRequest , BAD_REQUEST ; +InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; +InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; +InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; +InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; +InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ; +InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ; +InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ; +InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ; +InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; +InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; +InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; +InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; +InvalidSimilarRetrieveVectors , InvalidRequest , BAD_REQUEST ; +InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; +InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ; +InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ; +InvalidSearchRetrieveVectors , InvalidRequest , BAD_REQUEST ; +InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; +InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; +InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; +InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ; +InvalidSearchLocales , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchExhaustiveFacetCount , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; +InvalidSimilarId , InvalidRequest , BAD_REQUEST ; +InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; +InvalidSimilarFilter , InvalidRequest , BAD_REQUEST ; +InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ; +InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ; +InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ; +InvalidSimilarLimit , InvalidRequest , BAD_REQUEST ; +InvalidSearchLimit , InvalidRequest , BAD_REQUEST ; +InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ; +InvalidSimilarOffset , InvalidRequest , BAD_REQUEST ; +InvalidSearchOffset , InvalidRequest , BAD_REQUEST ; +InvalidSearchPage , InvalidRequest , BAD_REQUEST ; +InvalidSearchQ , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ; +FacetSearchDisabled , InvalidRequest , BAD_REQUEST ; +InvalidSearchVector , InvalidRequest , BAD_REQUEST ; +InvalidSearchMedia , InvalidRequest , BAD_REQUEST ; +InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ; +InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ; +InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ; +InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; +InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; +InvalidSearchSort , InvalidRequest , BAD_REQUEST ; +InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ; +InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ; +InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; +InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; +InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; +InvalidSettingsFacetSearch , InvalidRequest , BAD_REQUEST ; +InvalidSettingsPrefixSearch , InvalidRequest , BAD_REQUEST ; +InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; +InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; +InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; +InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; +InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ; +InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ; +InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ; +InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ; +InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ; +InvalidSettingsLocalizedAttributes , InvalidRequest , BAD_REQUEST ; +InvalidState , Internal , INTERNAL_SERVER_ERROR ; +InvalidStoreFile , Internal , INTERNAL_SERVER_ERROR ; +InvalidSwapDuplicateIndexFound , InvalidRequest , BAD_REQUEST ; +InvalidSwapIndexes , InvalidRequest , BAD_REQUEST ; +InvalidTaskAfterEnqueuedAt , InvalidRequest , BAD_REQUEST ; +InvalidTaskAfterFinishedAt , InvalidRequest , BAD_REQUEST ; +InvalidTaskAfterStartedAt , InvalidRequest , BAD_REQUEST ; +InvalidTaskBeforeEnqueuedAt , InvalidRequest , BAD_REQUEST ; +InvalidTaskBeforeFinishedAt , InvalidRequest , BAD_REQUEST ; +InvalidTaskBeforeStartedAt , InvalidRequest , BAD_REQUEST ; +InvalidTaskCanceledBy , InvalidRequest , BAD_REQUEST ; +InvalidTaskFrom , InvalidRequest , BAD_REQUEST ; +InvalidTaskLimit , InvalidRequest , BAD_REQUEST ; +InvalidTaskReverse , InvalidRequest , BAD_REQUEST ; +InvalidTaskStatuses , InvalidRequest , BAD_REQUEST ; +InvalidTaskTypes , InvalidRequest , BAD_REQUEST ; +InvalidTaskUids , InvalidRequest , BAD_REQUEST ; +InvalidBatchUids , InvalidRequest , BAD_REQUEST ; +IoError , System , UNPROCESSABLE_ENTITY; +FeatureNotEnabled , InvalidRequest , BAD_REQUEST ; +MalformedPayload , InvalidRequest , BAD_REQUEST ; +MaxFieldsLimitExceeded , InvalidRequest , BAD_REQUEST ; +MissingApiKeyActions , InvalidRequest , BAD_REQUEST ; +MissingApiKeyExpiresAt , InvalidRequest , BAD_REQUEST ; +MissingApiKeyIndexes , InvalidRequest , BAD_REQUEST ; +MissingAuthorizationHeader , Auth , UNAUTHORIZED ; +MissingContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; +MissingDocumentId , InvalidRequest , BAD_REQUEST ; +MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; +MissingIndexUid , InvalidRequest , BAD_REQUEST ; +MissingMasterKey , Auth , UNAUTHORIZED ; +MissingNetworkUrl , InvalidRequest , BAD_REQUEST ; +MissingPayload , InvalidRequest , BAD_REQUEST ; +MissingSearchHybrid , InvalidRequest , BAD_REQUEST ; +MissingSwapIndexes , InvalidRequest , BAD_REQUEST ; +MissingTaskFilters , InvalidRequest , BAD_REQUEST ; +NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY; +PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ; +RemoteBadResponse , System , BAD_GATEWAY ; +RemoteBadRequest , InvalidRequest , BAD_REQUEST ; +RemoteCouldNotSendRequest , System , BAD_GATEWAY ; +RemoteInvalidApiKey , Auth , FORBIDDEN ; +RemoteRemoteError , System , BAD_GATEWAY ; +RemoteTimeout , System , BAD_GATEWAY ; +TooManySearchRequests , System , SERVICE_UNAVAILABLE ; +TaskNotFound , InvalidRequest , NOT_FOUND ; +TaskFileNotFound , InvalidRequest , NOT_FOUND ; +BatchNotFound , InvalidRequest , NOT_FOUND ; +TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ; +TooManyVectors , InvalidRequest , BAD_REQUEST ; +UnretrievableDocument , Internal , BAD_REQUEST ; +UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; +UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; // Experimental features -VectorEmbeddingError , InvalidRequest , BAD_REQUEST ; -NotFoundSimilarId , InvalidRequest , BAD_REQUEST ; -InvalidDocumentEditionContext , InvalidRequest , BAD_REQUEST ; -InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ; -EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST +VectorEmbeddingError , InvalidRequest , BAD_REQUEST ; +NotFoundSimilarId , InvalidRequest , BAD_REQUEST ; +InvalidDocumentEditionContext , InvalidRequest , BAD_REQUEST ; +InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ; +EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ; +InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ; +// Export +InvalidExportUrl , InvalidRequest , BAD_REQUEST ; +InvalidExportApiKey , InvalidRequest , BAD_REQUEST ; +InvalidExportPayloadSize , InvalidRequest , BAD_REQUEST ; +InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ; +InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ; +InvalidExportIndexOverrideSettings , InvalidRequest , BAD_REQUEST ; +// Experimental features - Chat Completions +UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ; +UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ; +UnimplementedMultiChoiceChatCompletions , InvalidRequest , NOT_IMPLEMENTED ; +ChatNotFound , InvalidRequest , NOT_FOUND ; +InvalidChatSettingDocumentTemplate , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionOrgId , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionProjectId , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionApiVersion , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionDeploymentId , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionSource , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionBaseApi , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionApiKey , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionPrompts , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST } impl ErrorCode for JoinError { @@ -430,12 +459,14 @@ impl ErrorCode for milli::Error { | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidUrl { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } + | UserError::InvalidChatSettingsDocumentTemplateMaxBytes | UserError::InvalidPrompt(_) | UserError::InvalidDisableBinaryQuantization { .. } | UserError::InvalidSourceForNested { .. } | UserError::MissingSourceForNested { .. } | UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, + UserError::TooManyFragments(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, UserError::MultiplePrimaryKeyCandidatesFound { .. } => { diff --git a/crates/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs index 5db8775b6..3c78035e8 100644 --- a/crates/meilisearch-types/src/features.rs +++ b/crates/meilisearch-types/src/features.rs @@ -2,6 +2,14 @@ use std::collections::BTreeMap; use serde::{Deserialize, Serialize}; +use crate::error::{Code, ResponseError}; + +pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search."; +pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str = + "Search the database for relevant JSON documents using an optional query."; +pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results."; +pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query."; + #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] #[serde(rename_all = "camelCase", default)] pub struct RuntimeTogglableFeatures { @@ -12,6 +20,8 @@ pub struct RuntimeTogglableFeatures { pub network: bool, pub get_task_documents_route: bool, pub composite_embedders: bool, + pub chat_completions: bool, + pub multimodal: bool, } #[derive(Default, Debug, Clone, Copy)] @@ -37,3 +47,215 @@ pub struct Network { #[serde(default)] pub remotes: BTreeMap, } + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] +#[serde(rename_all = "camelCase")] +pub struct ChatCompletionSettings { + pub source: ChatCompletionSource, + #[serde(default)] + pub org_id: Option, + #[serde(default)] + pub project_id: Option, + #[serde(default)] + pub api_version: Option, + #[serde(default)] + pub deployment_id: Option, + #[serde(default)] + pub base_url: Option, + #[serde(default)] + pub api_key: Option, + #[serde(default)] + pub prompts: ChatCompletionPrompts, +} + +impl ChatCompletionSettings { + pub fn hide_secrets(&mut self) { + if let Some(api_key) = &mut self.api_key { + Self::hide_secret(api_key); + } + } + + fn hide_secret(secret: &mut String) { + match secret.len() { + x if x < 10 => { + secret.replace_range(.., "XXX..."); + } + x if x < 20 => { + secret.replace_range(2.., "XXXX..."); + } + x if x < 30 => { + secret.replace_range(3.., "XXXXX..."); + } + _x => { + secret.replace_range(5.., "XXXXXX..."); + } + } + } + + pub fn validate(&self) -> Result<(), ResponseError> { + use ChatCompletionSource::*; + match self { + Self { source: AzureOpenAi, base_url, deployment_id, api_version, .. } if base_url.is_none() || deployment_id.is_none() || api_version.is_none() => Err(ResponseError::from_msg( + "azureOpenAi requires setting a valid `baseUrl`, `deploymentId`, and `apiVersion`".to_string(), + Code::BadRequest, + )), + Self { source: VLlm, base_url, .. } if base_url.is_none() => Err(ResponseError::from_msg( + "vLlm requires setting a valid `baseUrl`".to_string(), + Code::BadRequest, + )), + _otherwise => Ok(()), + } + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "camelCase")] +pub enum ChatCompletionSource { + #[default] + OpenAi, + AzureOpenAi, + Mistral, + VLlm, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SystemRole { + System, + Developer, +} + +impl ChatCompletionSource { + pub fn system_role(&self, model: &str) -> SystemRole { + use ChatCompletionSource::*; + use SystemRole::*; + match self { + OpenAi if Self::old_openai_model(model) => System, + OpenAi => Developer, + AzureOpenAi if Self::old_openai_model(model) => System, + AzureOpenAi => Developer, + Mistral => System, + VLlm => System, + } + } + + /// Returns true if the model is an old OpenAI model. + /// + /// Old OpenAI models use the system role while new ones use the developer role. + fn old_openai_model(model: &str) -> bool { + ["gpt-3.5", "gpt-4", "gpt-4.1", "gpt-4.5", "gpt-4o", "chatgpt-4o"].iter().any(|old| { + model.starts_with(old) + && model.chars().nth(old.chars().count()).is_none_or(|last| last == '-') + }) + } + + pub fn base_url(&self) -> Option<&'static str> { + use ChatCompletionSource::*; + match self { + OpenAi => Some("https://api.openai.com/v1/"), + Mistral => Some("https://api.mistral.ai/v1/"), + AzureOpenAi | VLlm => None, + } + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct ChatCompletionPrompts { + pub system: String, + pub search_description: String, + pub search_q_param: String, + pub search_index_uid_param: String, +} + +impl Default for ChatCompletionPrompts { + fn default() -> Self { + Self { + system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(), + search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(), + search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(), + search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const ALL_OPENAI_MODELS_OLDINESS: &[(&str, bool)] = &[ + ("gpt-4-0613", true), + ("gpt-4", true), + ("gpt-3.5-turbo", true), + ("gpt-4o-audio-preview-2025-06-03", true), + ("gpt-4.1-nano", true), + ("gpt-4o-realtime-preview-2025-06-03", true), + ("gpt-3.5-turbo-instruct", true), + ("gpt-3.5-turbo-instruct-0914", true), + ("gpt-4-1106-preview", true), + ("gpt-3.5-turbo-1106", true), + ("gpt-4-0125-preview", true), + ("gpt-4-turbo-preview", true), + ("gpt-3.5-turbo-0125", true), + ("gpt-4-turbo", true), + ("gpt-4-turbo-2024-04-09", true), + ("gpt-4o", true), + ("gpt-4o-2024-05-13", true), + ("gpt-4o-mini-2024-07-18", true), + ("gpt-4o-mini", true), + ("gpt-4o-2024-08-06", true), + ("chatgpt-4o-latest", true), + ("gpt-4o-realtime-preview-2024-10-01", true), + ("gpt-4o-audio-preview-2024-10-01", true), + ("gpt-4o-audio-preview", true), + ("gpt-4o-realtime-preview", true), + ("gpt-4o-realtime-preview-2024-12-17", true), + ("gpt-4o-audio-preview-2024-12-17", true), + ("gpt-4o-mini-realtime-preview-2024-12-17", true), + ("gpt-4o-mini-audio-preview-2024-12-17", true), + ("gpt-4o-mini-realtime-preview", true), + ("gpt-4o-mini-audio-preview", true), + ("gpt-4o-2024-11-20", true), + ("gpt-4.5-preview", true), + ("gpt-4.5-preview-2025-02-27", true), + ("gpt-4o-search-preview-2025-03-11", true), + ("gpt-4o-search-preview", true), + ("gpt-4o-mini-search-preview-2025-03-11", true), + ("gpt-4o-mini-search-preview", true), + ("gpt-4o-transcribe", true), + ("gpt-4o-mini-transcribe", true), + ("gpt-4o-mini-tts", true), + ("gpt-4.1-2025-04-14", true), + ("gpt-4.1", true), + ("gpt-4.1-mini-2025-04-14", true), + ("gpt-4.1-mini", true), + ("gpt-4.1-nano-2025-04-14", true), + ("gpt-3.5-turbo-16k", true), + // + // new models + ("o1-preview-2024-09-12", false), + ("o1-preview", false), + ("o1-mini-2024-09-12", false), + ("o1-mini", false), + ("o1-2024-12-17", false), + ("o1", false), + ("o3-mini", false), + ("o3-mini-2025-01-31", false), + ("o1-pro-2025-03-19", false), + ("o1-pro", false), + ("o3-2025-04-16", false), + ("o4-mini-2025-04-16", false), + ("o3", false), + ("o4-mini", false), + ]; + + #[test] + fn old_openai_models() { + for (name, is_old) in ALL_OPENAI_MODELS_OLDINESS.iter().copied() { + assert_eq!( + ChatCompletionSource::old_openai_model(name), + is_old, + "Model {name} is not considered old" + ); + } + } +} diff --git a/crates/meilisearch-types/src/index_uid_pattern.rs b/crates/meilisearch-types/src/index_uid_pattern.rs index baf0249e2..f90fc7aee 100644 --- a/crates/meilisearch-types/src/index_uid_pattern.rs +++ b/crates/meilisearch-types/src/index_uid_pattern.rs @@ -12,7 +12,7 @@ use crate::index_uid::{IndexUid, IndexUidFormatError}; /// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long and optionally ending with a *. -#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)] pub struct IndexUidPattern(String); diff --git a/crates/meilisearch-types/src/keys.rs b/crates/meilisearch-types/src/keys.rs index 27f2047ee..3ba31c2cb 100644 --- a/crates/meilisearch-types/src/keys.rs +++ b/crates/meilisearch-types/src/keys.rs @@ -53,7 +53,7 @@ pub struct CreateApiKey { #[schema(example = json!(["documents.add"]))] #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_api_key_actions)] pub actions: Vec, - /// A list of accesible indexes permitted for the key. `["*"]` for all indexes. The `*` character can be used as a wildcard when located at the last position. e.g. `products_*` to allow access to all indexes whose names start with `products_`. + /// A list of accessible indexes permitted for the key. `["*"]` for all indexes. The `*` character can be used as a wildcard when located at the last position. e.g. `products_*` to allow access to all indexes whose names start with `products_`. #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_api_key_indexes)] #[schema(value_type = Vec, example = json!(["products"]))] pub indexes: Vec, @@ -158,6 +158,21 @@ impl Key { updated_at: now, } } + + pub fn default_chat() -> Self { + let now = OffsetDateTime::now_utc(); + let uid = Uuid::new_v4(); + Self { + name: Some("Default Chat API Key".to_string()), + description: Some("Use it to chat and search from the frontend".to_string()), + uid, + actions: vec![Action::ChatCompletions, Action::Search], + indexes: vec![IndexUidPattern::all()], + expires_at: None, + created_at: now, + updated_at: now, + } + } } fn parse_expiration_date( @@ -302,12 +317,36 @@ pub enum Action { #[serde(rename = "experimental.update")] #[deserr(rename = "experimental.update")] ExperimentalFeaturesUpdate, + #[serde(rename = "export")] + #[deserr(rename = "export")] + Export, #[serde(rename = "network.get")] #[deserr(rename = "network.get")] NetworkGet, #[serde(rename = "network.update")] #[deserr(rename = "network.update")] NetworkUpdate, + #[serde(rename = "chatCompletions")] + #[deserr(rename = "chatCompletions")] + ChatCompletions, + #[serde(rename = "chats.*")] + #[deserr(rename = "chats.*")] + ChatsAll, + #[serde(rename = "chats.get")] + #[deserr(rename = "chats.get")] + ChatsGet, + #[serde(rename = "chats.delete")] + #[deserr(rename = "chats.delete")] + ChatsDelete, + #[serde(rename = "chatsSettings.*")] + #[deserr(rename = "chatsSettings.*")] + ChatsSettingsAll, + #[serde(rename = "chatsSettings.get")] + #[deserr(rename = "chatsSettings.get")] + ChatsSettingsGet, + #[serde(rename = "chatsSettings.update")] + #[deserr(rename = "chatsSettings.update")] + ChatsSettingsUpdate, } impl Action { @@ -333,6 +372,13 @@ impl Action { SETTINGS_ALL => Some(Self::SettingsAll), SETTINGS_GET => Some(Self::SettingsGet), SETTINGS_UPDATE => Some(Self::SettingsUpdate), + CHAT_COMPLETIONS => Some(Self::ChatCompletions), + CHATS_ALL => Some(Self::ChatsAll), + CHATS_GET => Some(Self::ChatsGet), + CHATS_DELETE => Some(Self::ChatsDelete), + CHATS_SETTINGS_ALL => Some(Self::ChatsSettingsAll), + CHATS_SETTINGS_GET => Some(Self::ChatsSettingsGet), + CHATS_SETTINGS_UPDATE => Some(Self::ChatsSettingsUpdate), STATS_ALL => Some(Self::StatsAll), STATS_GET => Some(Self::StatsGet), METRICS_ALL => Some(Self::MetricsAll), @@ -395,6 +441,16 @@ pub mod actions { pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr(); pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr(); + pub const EXPORT: u8 = Export.repr(); + pub const NETWORK_GET: u8 = NetworkGet.repr(); pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr(); + + pub const CHAT_COMPLETIONS: u8 = ChatCompletions.repr(); + pub const CHATS_ALL: u8 = ChatsAll.repr(); + pub const CHATS_GET: u8 = ChatsGet.repr(); + pub const CHATS_DELETE: u8 = ChatsDelete.repr(); + pub const CHATS_SETTINGS_ALL: u8 = ChatsSettingsAll.repr(); + pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr(); + pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr(); } diff --git a/crates/meilisearch-types/src/lib.rs b/crates/meilisearch-types/src/lib.rs index a1a57b7e6..fe69da526 100644 --- a/crates/meilisearch-types/src/lib.rs +++ b/crates/meilisearch-types/src/lib.rs @@ -18,7 +18,7 @@ pub mod versioning; pub use milli::{heed, Index}; use uuid::Uuid; pub use versioning::VERSION_FILE_NAME; -pub use {milli, serde_cs}; +pub use {byte_unit, milli, serde_cs}; pub type Document = serde_json::Map; pub type InstanceUid = Uuid; diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index 97003074f..9e107a5c3 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -9,9 +9,11 @@ use std::str::FromStr; use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use fst::IntoStreamer; use milli::disabled_typos_terms::DisabledTyposTerms; -use milli::index::{IndexEmbeddingConfig, PrefixSearch}; +use milli::index::PrefixSearch; use milli::proximity::ProximityPrecision; +pub use milli::update::ChatSettings; use milli::update::Setting; +use milli::vector::db::IndexEmbeddingConfig; use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; use utoipa::ToSchema; @@ -132,7 +134,7 @@ pub struct PaginationSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option, example = json!(250))] - pub max_total_hits: Setting, + pub max_total_hits: Setting, } impl MergeWithError for DeserrJsonError { @@ -185,7 +187,7 @@ impl Deserr for SettingEmbeddingSettings { /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a /// call to `check` will return a `Settings` from a `Settings`. -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Deserr, ToSchema)] #[serde( deny_unknown_fields, rename_all = "camelCase", @@ -199,72 +201,86 @@ pub struct Settings { #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["id", "title", "description", "url"]))] pub displayed_attributes: WildcardSetting, + /// Fields in which to search for matching query words sorted by order of importance. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["title", "description"]))] pub searchable_attributes: WildcardSetting, + /// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] pub filterable_attributes: Setting>, + /// Attributes to use when sorting search results. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["release_date"]))] pub sortable_attributes: Setting>, + /// List of ranking rules sorted by order of importance. The order is customizable. /// [A list of ordered built-in ranking rules](https://www.meilisearch.com/docs/learn/relevancy/relevancy). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!([RankingRuleView::Words, RankingRuleView::Typo, RankingRuleView::Proximity, RankingRuleView::Attribute, RankingRuleView::Exactness]))] pub ranking_rules: Setting>, + /// List of words ignored when present in search queries. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["the", "a", "them", "their"]))] pub stop_words: Setting>, + /// List of characters not delimiting where one term begins and ends. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!([" ", "\n"]))] pub non_separator_tokens: Setting>, + /// List of characters delimiting where one term begins and ends. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["S"]))] pub separator_tokens: Setting>, + /// List of strings Meilisearch should parse as a single term. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["iPhone pro"]))] pub dictionary: Setting>, + /// List of associated words treated similarly. A word associated to an array of word as synonyms. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>>, example = json!({ "he": ["she", "they", "them"], "phone": ["iPhone", "android"]}))] pub synonyms: Setting>>, + /// Search returns documents with distinct (different) values of the given field. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!("sku"))] pub distinct_attribute: Setting, + /// Precision level when calculating the proximity ranking rule. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!(ProximityPrecisionView::ByAttribute))] pub proximity_precision: Setting, + /// Customize typo tolerance feature. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!({ "enabled": true, "disableOnAttributes": ["title"]}))] pub typo_tolerance: Setting, + /// Faceting settings. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!({ "maxValuesPerFacet": 10, "sortFacetValuesBy": { "genre": FacetValuesSort::Count }}))] pub faceting: Setting, + /// Pagination settings. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] @@ -276,24 +292,34 @@ pub struct Settings { #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>)] pub embedders: Setting>, + /// Maximum duration of a search query. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!(50))] pub search_cutoff_ms: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(50))] pub localized_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!(true))] pub facet_search: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!("Hemlo"))] pub prefix_search: Setting, + /// Customize the chat prompting. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option)] + pub chat: Setting, + #[serde(skip)] #[deserr(skip)] pub _kind: PhantomData, @@ -359,6 +385,7 @@ impl Settings { localized_attributes: Setting::Reset, facet_search: Setting::Reset, prefix_search: Setting::Reset, + chat: Setting::Reset, _kind: PhantomData, } } @@ -385,6 +412,7 @@ impl Settings { localized_attributes: localized_attributes_rules, facet_search, prefix_search, + chat, _kind, } = self; @@ -409,6 +437,7 @@ impl Settings { localized_attributes: localized_attributes_rules, facet_search, prefix_search, + chat, _kind: PhantomData, } } @@ -459,6 +488,7 @@ impl Settings { localized_attributes: self.localized_attributes, facet_search: self.facet_search, prefix_search: self.prefix_search, + chat: self.chat, _kind: PhantomData, } } @@ -471,8 +501,11 @@ impl Settings { let Setting::Set(mut configs) = self.embedders else { return Ok(self) }; for (name, config) in configs.iter_mut() { let config_to_check = std::mem::take(config); - let checked_config = - milli::update::validate_embedding_settings(config_to_check.inner, name)?; + let checked_config = milli::update::validate_embedding_settings( + config_to_check.inner, + name, + milli::vector::settings::EmbeddingValidationContext::SettingsPartialUpdate, + )?; *config = SettingEmbeddingSettings { inner: checked_config }; } self.embedders = Setting::Set(configs); @@ -533,8 +566,9 @@ impl Settings { Setting::Set(this) } }, - prefix_search: other.prefix_search.or(self.prefix_search), facet_search: other.facet_search.or(self.facet_search), + prefix_search: other.prefix_search.or(self.prefix_search), + chat: other.chat.clone().or(self.chat.clone()), _kind: PhantomData, } } @@ -573,6 +607,7 @@ pub fn apply_settings_to_builder( localized_attributes: localized_attributes_rules, facet_search, prefix_search, + chat, _kind, } = settings; @@ -720,6 +755,7 @@ pub fn apply_settings_to_builder( builder.reset_min_word_len_two_typos(); builder.reset_exact_words(); builder.reset_exact_attributes(); + builder.reset_disable_on_numbers(); } Setting::NotSet => (), } @@ -748,7 +784,7 @@ pub fn apply_settings_to_builder( match pagination { Setting::Set(ref value) => match value.max_total_hits { - Setting::Set(val) => builder.set_pagination_max_total_hits(val), + Setting::Set(val) => builder.set_pagination_max_total_hits(val.into()), Setting::Reset => builder.reset_pagination_max_total_hits(), Setting::NotSet => (), }, @@ -783,6 +819,12 @@ pub fn apply_settings_to_builder( Setting::Reset => builder.reset_facet_search(), Setting::NotSet => (), } + + match chat { + Setting::Set(chat) => builder.set_chat(chat.clone()), + Setting::Reset => builder.reset_chat(), + Setting::NotSet => (), + } } pub enum SecretPolicy { @@ -867,12 +909,13 @@ pub fn settings( max_total_hits: Setting::Set( index .pagination_max_total_hits(rtxn)? - .map(|x| x as usize) - .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), + .and_then(|x| (x as usize).try_into().ok()) + .unwrap_or(NonZeroUsize::new(DEFAULT_PAGINATION_MAX_TOTAL_HITS).unwrap()), ), }; let embedders: BTreeMap<_, _> = index + .embedding_configs() .embedding_configs(rtxn)? .into_iter() .map(|IndexEmbeddingConfig { name, config, .. }| { @@ -880,14 +923,11 @@ pub fn settings( }) .collect(); let embedders = Setting::Set(embedders); - let search_cutoff_ms = index.search_cutoff(rtxn)?; - let localized_attributes_rules = index.localized_attributes_rules(rtxn)?; - let prefix_search = index.prefix_search(rtxn)?.map(PrefixSearchSettings::from); - let facet_search = index.facet_search(rtxn)?; + let chat = index.chat_config(rtxn).map(ChatSettings::from)?; let mut settings = Settings { displayed_attributes: match displayed_attributes { @@ -925,14 +965,16 @@ pub fn settings( Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()), None => Setting::Reset, }, - prefix_search: Setting::Set(prefix_search.unwrap_or_default()), facet_search: Setting::Set(facet_search), + prefix_search: Setting::Set(prefix_search.unwrap_or_default()), + chat: Setting::Set(chat), _kind: PhantomData, }; if let SecretPolicy::HideSecrets = secret_policy { settings.hide_secrets() } + Ok(settings) } @@ -1154,6 +1196,7 @@ pub(crate) mod test { search_cutoff_ms: Setting::NotSet, facet_search: Setting::NotSet, prefix_search: Setting::NotSet, + chat: Setting::NotSet, _kind: PhantomData::, }; @@ -1185,6 +1228,8 @@ pub(crate) mod test { search_cutoff_ms: Setting::NotSet, facet_search: Setting::NotSet, prefix_search: Setting::NotSet, + chat: Setting::NotSet, + _kind: PhantomData::, }; diff --git a/crates/meilisearch-types/src/task_view.rs b/crates/meilisearch-types/src/task_view.rs index 7a6faee39..7521137c0 100644 --- a/crates/meilisearch-types/src/task_view.rs +++ b/crates/meilisearch-types/src/task_view.rs @@ -1,3 +1,6 @@ +use std::collections::BTreeMap; + +use byte_unit::UnitType; use milli::Object; use serde::{Deserialize, Serialize}; use time::{Duration, OffsetDateTime}; @@ -6,9 +9,11 @@ use utoipa::ToSchema; use crate::batches::BatchId; use crate::error::ResponseError; use crate::settings::{Settings, Unchecked}; -use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId}; +use crate::tasks::{ + serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId, +}; -#[derive(Debug, Clone, PartialEq, Eq, Serialize, ToSchema)] +#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] #[schema(rename_all = "camelCase")] pub struct TaskView { @@ -67,7 +72,7 @@ impl TaskView { } } -#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize, Deserialize, ToSchema)] +#[derive(Default, Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] #[schema(rename_all = "camelCase")] pub struct DetailsView { @@ -118,6 +123,15 @@ pub struct DetailsView { pub upgrade_from: Option, #[serde(skip_serializing_if = "Option::is_none")] pub upgrade_to: Option, + // exporting + #[serde(skip_serializing_if = "Option::is_none")] + pub url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub api_key: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub payload_size: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub indexes: Option>, } impl DetailsView { @@ -238,6 +252,34 @@ impl DetailsView { Some(left) } }, + url: match (self.url.clone(), other.url.clone()) { + (None, None) => None, + (None, Some(url)) | (Some(url), None) => Some(url), + // We should never be able to batch multiple exports at the same time. + // So we return the first one we encounter but that shouldn't be an issue anyway. + (Some(left), Some(_right)) => Some(left), + }, + api_key: match (self.api_key.clone(), other.api_key.clone()) { + (None, None) => None, + (None, Some(key)) | (Some(key), None) => Some(key), + // We should never be able to batch multiple exports at the same time. + // So we return the first one we encounter but that shouldn't be an issue anyway. + (Some(left), Some(_right)) => Some(left), + }, + payload_size: match (self.payload_size.clone(), other.payload_size.clone()) { + (None, None) => None, + (None, Some(size)) | (Some(size), None) => Some(size), + // We should never be able to batch multiple exports at the same time. + // So we return the first one we encounter but that shouldn't be an issue anyway. + (Some(left), Some(_right)) => Some(left), + }, + indexes: match (self.indexes.clone(), other.indexes.clone()) { + (None, None) => None, + (None, Some(indexes)) | (Some(indexes), None) => Some(indexes), + // We should never be able to batch multiple exports at the same time. + // So we return the first one we encounter but that shouldn't be an issue anyway. + (Some(left), Some(_right)) => Some(left), + }, // We want the earliest version upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) { (None, None) => None, @@ -327,6 +369,22 @@ impl From
for DetailsView { Details::IndexSwap { swaps } => { DetailsView { swaps: Some(swaps), ..Default::default() } } + Details::Export { url, api_key, payload_size, indexes } => DetailsView { + url: Some(url), + api_key: api_key.map(|mut api_key| { + hide_secret(&mut api_key); + api_key + }), + payload_size: payload_size + .map(|ps| ps.get_appropriate_unit(UnitType::Both).to_string()), + indexes: Some( + indexes + .into_iter() + .map(|(pattern, settings)| (pattern.to_string(), settings)) + .collect(), + ), + ..Default::default() + }, Details::UpgradeDatabase { from, to } => DetailsView { upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)), upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)), @@ -335,3 +393,21 @@ impl From
for DetailsView { } } } + +// We definitely need to factorize the code to hide the secret key +fn hide_secret(secret: &mut String) { + match secret.len() { + x if x < 10 => { + secret.replace_range(.., "XXX..."); + } + x if x < 20 => { + secret.replace_range(2.., "XXXX..."); + } + x if x < 30 => { + secret.replace_range(3.., "XXXXX..."); + } + _x => { + secret.replace_range(5.., "XXXXXX..."); + } + } +} diff --git a/crates/meilisearch-types/src/tasks.rs b/crates/meilisearch-types/src/tasks.rs index 6e10f2606..99b04f1e3 100644 --- a/crates/meilisearch-types/src/tasks.rs +++ b/crates/meilisearch-types/src/tasks.rs @@ -1,19 +1,22 @@ use core::fmt; -use std::collections::HashSet; +use std::collections::{BTreeMap, HashSet}; use std::fmt::{Display, Write}; use std::str::FromStr; +use byte_unit::Byte; use enum_iterator::Sequence; use milli::update::IndexDocumentsMethod; use milli::Object; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize, Serializer}; +use serde_json::Value; use time::{Duration, OffsetDateTime}; -use utoipa::ToSchema; +use utoipa::{schema, ToSchema}; use uuid::Uuid; use crate::batches::BatchId; use crate::error::ResponseError; +use crate::index_uid_pattern::IndexUidPattern; use crate::keys::Key; use crate::settings::{Settings, Unchecked}; use crate::{versioning, InstanceUid}; @@ -50,6 +53,7 @@ impl Task { | SnapshotCreation | TaskCancelation { .. } | TaskDeletion { .. } + | Export { .. } | UpgradeDatabase { .. } | IndexSwap { .. } => None, DocumentAdditionOrUpdate { index_uid, .. } @@ -86,6 +90,7 @@ impl Task { | KindWithContent::TaskDeletion { .. } | KindWithContent::DumpCreation { .. } | KindWithContent::SnapshotCreation + | KindWithContent::Export { .. } | KindWithContent::UpgradeDatabase { .. } => None, } } @@ -108,11 +113,11 @@ pub enum KindWithContent { }, DocumentDeletionByFilter { index_uid: String, - filter_expr: serde_json::Value, + filter_expr: Value, }, DocumentEdition { index_uid: String, - filter_expr: Option, + filter_expr: Option, context: Option, function: String, }, @@ -152,6 +157,12 @@ pub enum KindWithContent { instance_uid: Option, }, SnapshotCreation, + Export { + url: String, + api_key: Option, + payload_size: Option, + indexes: BTreeMap, + }, UpgradeDatabase { from: (u32, u32, u32), }, @@ -163,6 +174,13 @@ pub struct IndexSwap { pub indexes: (String, String), } +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ExportIndexSettings { + pub filter: Option, + pub override_settings: bool, +} + impl KindWithContent { pub fn as_kind(&self) -> Kind { match self { @@ -180,6 +198,7 @@ impl KindWithContent { KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion, KindWithContent::DumpCreation { .. } => Kind::DumpCreation, KindWithContent::SnapshotCreation => Kind::SnapshotCreation, + KindWithContent::Export { .. } => Kind::Export, KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase, } } @@ -192,6 +211,7 @@ impl KindWithContent { | SnapshotCreation | TaskCancelation { .. } | TaskDeletion { .. } + | Export { .. } | UpgradeDatabase { .. } => vec![], DocumentAdditionOrUpdate { index_uid, .. } | DocumentEdition { index_uid, .. } @@ -269,6 +289,14 @@ impl KindWithContent { }), KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), KindWithContent::SnapshotCreation => None, + KindWithContent::Export { url, api_key, payload_size, indexes } => { + Some(Details::Export { + url: url.clone(), + api_key: api_key.clone(), + payload_size: *payload_size, + indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(), + }) + } KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase { from: (from.0, from.1, from.2), to: ( @@ -335,6 +363,14 @@ impl KindWithContent { }), KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), KindWithContent::SnapshotCreation => None, + KindWithContent::Export { url, api_key, payload_size, indexes } => { + Some(Details::Export { + url: url.clone(), + api_key: api_key.clone(), + payload_size: *payload_size, + indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(), + }) + } KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase { from: *from, to: ( @@ -383,6 +419,14 @@ impl From<&KindWithContent> for Option
{ }), KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), KindWithContent::SnapshotCreation => None, + KindWithContent::Export { url, api_key, payload_size, indexes } => { + Some(Details::Export { + url: url.clone(), + api_key: api_key.clone(), + payload_size: *payload_size, + indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(), + }) + } KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase { from: *from, to: ( @@ -499,6 +543,7 @@ pub enum Kind { TaskDeletion, DumpCreation, SnapshotCreation, + Export, UpgradeDatabase, } @@ -516,6 +561,7 @@ impl Kind { | Kind::TaskCancelation | Kind::TaskDeletion | Kind::DumpCreation + | Kind::Export | Kind::UpgradeDatabase | Kind::SnapshotCreation => false, } @@ -536,6 +582,7 @@ impl Display for Kind { Kind::TaskDeletion => write!(f, "taskDeletion"), Kind::DumpCreation => write!(f, "dumpCreation"), Kind::SnapshotCreation => write!(f, "snapshotCreation"), + Kind::Export => write!(f, "export"), Kind::UpgradeDatabase => write!(f, "upgradeDatabase"), } } @@ -568,6 +615,8 @@ impl FromStr for Kind { Ok(Kind::DumpCreation) } else if kind.eq_ignore_ascii_case("snapshotCreation") { Ok(Kind::SnapshotCreation) + } else if kind.eq_ignore_ascii_case("export") { + Ok(Kind::Export) } else if kind.eq_ignore_ascii_case("upgradeDatabase") { Ok(Kind::UpgradeDatabase) } else { @@ -597,7 +646,7 @@ impl fmt::Display for ParseTaskKindError { } impl std::error::Error for ParseTaskKindError {} -#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] pub enum Details { DocumentAdditionOrUpdate { received_documents: u64, @@ -643,12 +692,33 @@ pub enum Details { IndexSwap { swaps: Vec, }, + Export { + url: String, + api_key: Option, + payload_size: Option, + indexes: BTreeMap, + }, UpgradeDatabase { from: (u32, u32, u32), to: (u32, u32, u32), }, } +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] +#[schema(rename_all = "camelCase")] +pub struct DetailsExportIndexSettings { + #[serde(flatten)] + pub settings: ExportIndexSettings, + #[serde(skip_serializing_if = "Option::is_none")] + pub matched_documents: Option, +} + +impl From for DetailsExportIndexSettings { + fn from(settings: ExportIndexSettings) -> Self { + DetailsExportIndexSettings { settings, matched_documents: None } + } +} + impl Details { pub fn to_failed(&self) -> Self { let mut details = self.clone(); @@ -667,6 +737,7 @@ impl Details { Self::SettingsUpdate { .. } | Self::IndexInfo { .. } | Self::Dump { .. } + | Self::Export { .. } | Self::UpgradeDatabase { .. } | Self::IndexSwap { .. } => (), } @@ -746,70 +817,70 @@ impl Display for BatchStopReason { match self { BatchStopReason::Unspecified => f.write_str("unspecified"), BatchStopReason::TaskKindCannotBeBatched { kind } => { - write!(f, "a batch of tasks of type `{kind}` cannot be batched with any other type of task") + write!(f, "stopped after the last task of type `{kind}` because they cannot be batched with tasks of any other type.") } BatchStopReason::TaskCannotBeBatched { kind, id } => { - write!(f, "task with id {id} of type `{kind}` cannot be batched") + write!(f, "created batch containing only task with id {id} of type `{kind}` that cannot be batched with any other task.") } BatchStopReason::ExhaustedEnqueuedTasks => f.write_str("batched all enqueued tasks"), BatchStopReason::ExhaustedEnqueuedTasksForIndex { index } => { write!(f, "batched all enqueued tasks for index `{index}`") } BatchStopReason::ReachedTaskLimit { task_limit } => { - write!(f, "reached configured batch limit of {task_limit} tasks") + write!(f, "batched up to configured batch limit of {task_limit} tasks") } BatchStopReason::ReachedSizeLimit { size_limit, size } => write!( f, - "reached configured batch size limit of {size_limit}B with a total of {size}B" + "batched up to configured batch size limit of {size_limit}B with a total of {size}B", ), BatchStopReason::PrimaryKeyIndexMismatch { id, in_index, in_task } => { - write!(f, "primary key `{in_task}` in task with id {id} is different from the primary key of the index `{in_index}`") + write!(f, "stopped batching before task with id {id} because its primary key `{in_task}` is different from the primary key of the index `{in_index}`") } BatchStopReason::IndexCreationMismatch { id } => { - write!(f, "task with id {id} has different index creation rules as in the batch") + write!(f, "stopped batching before task with id {id} because its index creation rules differ from the ones from the batch") } BatchStopReason::PrimaryKeyMismatch { reason, id } => match reason { PrimaryKeyMismatchReason::TaskPrimaryKeyDifferFromIndexPrimaryKey { task_pk, index_pk, } => { - write!(f, "primary key `{task_pk}` in task with id {id} is different from the primary key of the index `{index_pk}`") + write!(f, "stopped batching before task with id {id} because its primary key `{task_pk}` is different from the primary key of the index `{index_pk}`") } PrimaryKeyMismatchReason::TaskPrimaryKeyDifferFromCurrentBatchPrimaryKey { task_pk, batch_pk, } => { - write!(f, "primary key `{task_pk}` in task with id {id} is different from the primary key of the batch `{batch_pk}`") + write!(f, "stopped batching before task with id {id} because its primary key `{task_pk}` is different from the primary key of the batch `{batch_pk}`") } PrimaryKeyMismatchReason::CannotInterfereWithPrimaryKeyGuessing { task_pk } => { - write!(f, "task with id {id} is setting the `{task_pk}` primary key but cannot interfere with primary key guessing of the batch") + write!(f, "stopped batching before task with id {id} because it is setting the `{task_pk}` primary key and it would interfere with primary key guessing of the batch") } }, BatchStopReason::IndexDeletion { id } => { - write!(f, "task with id {id} deletes the index") + write!(f, "stopped after task with id {id} because it deletes the index") } BatchStopReason::DocumentOperationWithSettings { id } => { write!( f, - "task with id {id} is a settings change in a batch of document operations" + "stopped before task with id {id} because it is a settings change which cannot be batched with document operations" ) } BatchStopReason::DocumentOperationWithDeletionByFilter { id } => { write!( f, - "task with id {id} is a deletion by filter in a batch of document operations" + "stopped before task with id {id} because it is a deletion by filter which cannot be batched with document operations" ) } BatchStopReason::DeletionByFilterWithDocumentOperation { id } => { write!( f, - "task with id {id} is a document operation in a batch of deletions by filter" + "stopped before task with id {id} because it is a document operation which cannot be batched with deletions by filter" ) } BatchStopReason::SettingsWithDocumentOperation { id } => { write!( f, - "task with id {id} is a document operation in a batch of settings changes" + "stopped before task with id {id} because it is a document operation which cannot be batched with settings changes" ) } } diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml index dffa60326..83eb439d9 100644 --- a/crates/meilisearch/Cargo.toml +++ b/crates/meilisearch/Cargo.toml @@ -13,50 +13,50 @@ license.workspace = true default-run = "meilisearch" [dependencies] -actix-cors = "0.7.0" -actix-http = { version = "3.9.0", default-features = false, features = [ +actix-cors = "0.7.1" +actix-http = { version = "3.11.0", default-features = false, features = [ "compress-brotli", "compress-gzip", "rustls-0_23", ] } actix-utils = "3.0.1" -actix-web = { version = "4.9.0", default-features = false, features = [ +actix-web = { version = "4.11.0", default-features = false, features = [ "macros", "compress-brotli", "compress-gzip", "cookies", "rustls-0_23", ] } -anyhow = { version = "1.0.95", features = ["backtrace"] } -async-trait = "0.1.85" -bstr = "1.11.3" +anyhow = { version = "1.0.98", features = ["backtrace"] } +bstr = "1.12.0" byte-unit = { version = "5.1.6", features = ["serde"] } -bytes = "1.9.0" -clap = { version = "4.5.24", features = ["derive", "env"] } +bytes = "1.10.1" +bumpalo = "3.18.1" +clap = { version = "4.5.40", features = ["derive", "env"] } crossbeam-channel = "0.5.15" deserr = { version = "0.6.3", features = ["actix-web"] } dump = { path = "../dump" } -either = "1.13.0" +either = "1.15.0" file-store = { path = "../file-store" } -flate2 = "1.0.35" +flate2 = "1.1.2" fst = "0.4.7" futures = "0.3.31" futures-util = "0.3.31" index-scheduler = { path = "../index-scheduler" } -indexmap = { version = "2.7.0", features = ["serde"] } -is-terminal = "0.4.13" +indexmap = { version = "2.9.0", features = ["serde"] } +is-terminal = "0.4.16" itertools = "0.14.0" -jsonwebtoken = "9.3.0" +jsonwebtoken = "9.3.1" lazy_static = "1.5.0" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -mimalloc = { version = "0.1.43", default-features = false } +mimalloc = { version = "0.1.47", default-features = false } mime = "0.3.17" -num_cpus = "1.16.0" +num_cpus = "1.17.0" obkv = "0.3.0" -once_cell = "1.20.2" -ordered-float = "4.6.0" -parking_lot = "0.12.3" +once_cell = "1.21.3" +ordered-float = "5.0.0" +parking_lot = "0.12.4" permissive-json-pointer = { path = "../permissive-json-pointer" } pin-project-lite = "0.2.16" platform-dirs = "0.3.0" @@ -64,44 +64,44 @@ prometheus = { version = "0.14.0", features = ["process"] } rand = "0.8.5" rayon = "1.10.0" regex = "1.11.1" -reqwest = { version = "0.12.12", features = [ +reqwest = { version = "0.12.20", features = [ "rustls-tls", "json", ], default-features = false } -rustls = { version = "0.23.20", features = ["ring"], default-features = false } -rustls-pki-types = { version = "1.10.1", features = ["alloc"] } +rustls = { version = "0.23.28", features = ["ring"], default-features = false } +rustls-pki-types = { version = "1.12.0", features = ["alloc"] } rustls-pemfile = "2.2.0" -segment = { version = "0.2.5" } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.135", features = ["preserve_order"] } -sha2 = "0.10.8" +segment = { version = "0.2.6" } +serde = { version = "1.0.219", features = ["derive"] } +serde_json = { version = "1.0.140", features = ["preserve_order"] } +sha2 = "0.10.9" siphasher = "1.0.1" slice-group-by = "0.3.1" -static-files = { version = "0.2.4", optional = true } -sysinfo = "0.33.1" -tar = "0.4.43" -tempfile = "3.15.0" -thiserror = "2.0.9" -time = { version = "0.3.37", features = [ +static-files = { version = "0.2.5", optional = true } +sysinfo = "0.35.2" +tar = "0.4.44" +tempfile = "3.20.0" +thiserror = "2.0.12" +time = { version = "0.3.41", features = [ "serde-well-known", "formatting", "parsing", "macros", ] } -tokio = { version = "1.43.1", features = ["full"] } -toml = "0.8.19" -uuid = { version = "1.11.0", features = ["serde", "v4"] } +tokio = { version = "1.45.1", features = ["full"] } +toml = "0.8.23" +uuid = { version = "1.17.0", features = ["serde", "v4"] } serde_urlencoded = "0.7.1" termcolor = "1.4.1" url = { version = "2.5.4", features = ["serde"] } tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["json"] } tracing-trace = { version = "0.1.0", path = "../tracing-trace" } -tracing-actix-web = "0.7.15" +tracing-actix-web = "0.7.18" build-info = { version = "1.7.0", path = "../build-info" } -roaring = "0.10.10" +roaring = "0.10.12" mopa-maintained = "0.2.3" -utoipa = { version = "5.3.1", features = [ +utoipa = { version = "5.4.0", features = [ "actix_extras", "macros", "non_strict_integers", @@ -111,10 +111,13 @@ utoipa = { version = "5.3.1", features = [ "openapi_extensions", ] } utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] } +async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" } +secrecy = "0.10.3" +actix-web-lab = { version = "0.24.1", default-features = false } [dev-dependencies] actix-rt = "2.10.0" -brotli = "6.0.0" +brotli = "8.0.1" # fixed version due to format breakages in v1.40 insta = { version = "=1.39.0", features = ["redactions"] } manifest-dir-macros = "0.1.18" @@ -122,21 +125,21 @@ maplit = "1.0.2" meili-snap = { path = "../meili-snap" } temp-env = "0.3.6" urlencoding = "2.1.3" -wiremock = "0.6.2" +wiremock = "0.6.3" yaup = "0.3.1" [build-dependencies] -anyhow = { version = "1.0.95", optional = true } -cargo_toml = { version = "0.21.0", optional = true } +anyhow = { version = "1.0.98", optional = true } +cargo_toml = { version = "0.22.1", optional = true } hex = { version = "0.4.3", optional = true } -reqwest = { version = "0.12.12", features = [ +reqwest = { version = "0.12.20", features = [ "blocking", "rustls-tls", ], default-features = false, optional = true } sha-1 = { version = "0.10.1", optional = true } -static-files = { version = "0.2.4", optional = true } -tempfile = { version = "3.15.0", optional = true } -zip = { version = "2.3.0", optional = true } +static-files = { version = "0.2.5", optional = true } +tempfile = { version = "3.20.0", optional = true } +zip = { version = "4.1.0", optional = true } [features] default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] @@ -166,5 +169,5 @@ german = ["meilisearch-types/german"] turkish = ["meilisearch-types/turkish"] [package.metadata.mini-dashboard] -assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip" -sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834" +assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.20/build.zip" +sha1 = "82a7ddd7bf14bb5323c3d235d2b62892a98b6a59" diff --git a/crates/meilisearch/src/analytics/segment_analytics.rs b/crates/meilisearch/src/analytics/segment_analytics.rs index 3209bba4c..0abc5c817 100644 --- a/crates/meilisearch/src/analytics/segment_analytics.rs +++ b/crates/meilisearch/src/analytics/segment_analytics.rs @@ -197,9 +197,13 @@ struct Infos { experimental_max_number_of_batched_tasks: usize, experimental_limit_batched_tasks_total_size: u64, experimental_network: bool, + experimental_multimodal: bool, + experimental_chat_completions: bool, experimental_get_task_documents_route: bool, experimental_composite_embedders: bool, experimental_embedding_cache_entries: usize, + experimental_no_snapshot_compaction: bool, + experimental_no_edition_2024_for_settings: bool, gpu_enabled: bool, db_path: bool, import_dump: bool, @@ -248,6 +252,7 @@ impl Infos { experimental_max_number_of_batched_tasks, experimental_limit_batched_tasks_total_size, experimental_embedding_cache_entries, + experimental_no_snapshot_compaction, http_addr, master_key: _, env, @@ -283,8 +288,12 @@ impl Infos { ScheduleSnapshot::Enabled(interval) => Some(interval), }; - let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = - indexer_options; + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + skip_index_budget: _, + experimental_no_edition_2024_for_settings, + } = indexer_options; let RuntimeTogglableFeatures { metrics, @@ -294,6 +303,8 @@ impl Infos { network, get_task_documents_route, composite_embedders, + chat_completions, + multimodal, } = features; // We're going to override every sensible information. @@ -312,9 +323,12 @@ impl Infos { experimental_enable_logs_route: experimental_enable_logs_route | logs_route, experimental_reduce_indexing_memory_usage, experimental_network: network, + experimental_chat_completions: chat_completions, + experimental_multimodal: multimodal, experimental_get_task_documents_route: get_task_documents_route, experimental_composite_embedders: composite_embedders, experimental_embedding_cache_entries, + experimental_no_snapshot_compaction, gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), @@ -344,6 +358,7 @@ impl Infos { ssl_require_auth, ssl_resumption, ssl_tickets, + experimental_no_edition_2024_for_settings, } } } diff --git a/crates/meilisearch/src/error.rs b/crates/meilisearch/src/error.rs index b13eb8d7c..91c6c23fa 100644 --- a/crates/meilisearch/src/error.rs +++ b/crates/meilisearch/src/error.rs @@ -76,8 +76,10 @@ pub enum MeilisearchHttpError { DocumentFormat(#[from] DocumentFormatError), #[error(transparent)] Join(#[from] JoinError), - #[error("Invalid request: missing `hybrid` parameter when `vector` is present.")] + #[error("Invalid request: missing `hybrid` parameter when `vector` or `media` are present.")] MissingSearchHybrid, + #[error("Invalid request: both `media` and `vector` parameters are present.")] + MediaAndVector, } impl MeilisearchHttpError { @@ -111,6 +113,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::DocumentFormat(e) => e.error_code(), MeilisearchHttpError::Join(_) => Code::Internal, MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid, + MeilisearchHttpError::MediaAndVector => Code::InvalidSearchMediaAndVector, MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => { Code::InvalidMultiSearchFederationOptions } diff --git a/crates/meilisearch/src/extractors/authentication/mod.rs b/crates/meilisearch/src/extractors/authentication/mod.rs index 28a6d770e..86614f153 100644 --- a/crates/meilisearch/src/extractors/authentication/mod.rs +++ b/crates/meilisearch/src/extractors/authentication/mod.rs @@ -4,6 +4,7 @@ use std::marker::PhantomData; use std::ops::Deref; use std::pin::Pin; +use actix_web::http::header::AUTHORIZATION; use actix_web::web::Data; use actix_web::FromRequest; pub use error::AuthenticationError; @@ -94,36 +95,44 @@ impl FromRequest for GuardedData _payload: &mut actix_web::dev::Payload, ) -> Self::Future { match req.app_data::>().cloned() { - Some(auth) => match req - .headers() - .get("Authorization") - .map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' ')) - { - Some(mut type_token) => match type_token.next() { - Some("Bearer") => { - // TODO: find a less hardcoded way? - let index = req.match_info().get("index_uid"); - match type_token.next() { - Some(token) => Box::pin(Self::auth_bearer( - auth, - token.to_string(), - index.map(String::from), - req.app_data::().cloned(), - )), - None => Box::pin(err(AuthenticationError::InvalidToken.into())), - } - } - _otherwise => { - Box::pin(err(AuthenticationError::MissingAuthorizationHeader.into())) - } - }, - None => Box::pin(Self::auth_token(auth, req.app_data::().cloned())), + Some(auth) => match extract_token_from_request(req) { + Ok(Some(token)) => { + // TODO: find a less hardcoded way? + let index = req.match_info().get("index_uid"); + Box::pin(Self::auth_bearer( + auth, + token.to_string(), + index.map(String::from), + req.app_data::().cloned(), + )) + } + Ok(None) => Box::pin(Self::auth_token(auth, req.app_data::().cloned())), + Err(e) => Box::pin(err(e.into())), }, None => Box::pin(err(AuthenticationError::IrretrievableState.into())), } } } +pub fn extract_token_from_request( + req: &actix_web::HttpRequest, +) -> Result, AuthenticationError> { + match req + .headers() + .get(AUTHORIZATION) + .map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' ')) + { + Some(mut type_token) => match type_token.next() { + Some("Bearer") => match type_token.next() { + Some(token) => Ok(Some(token)), + None => Err(AuthenticationError::InvalidToken), + }, + _otherwise => Err(AuthenticationError::MissingAuthorizationHeader), + }, + None => Ok(None), + } +} + pub trait Policy { fn authenticate( auth: Data, @@ -299,8 +308,8 @@ pub mod policies { auth: &AuthController, token: &str, ) -> Result { - // Only search action can be accessed by a tenant token. - if A != actions::SEARCH { + // Only search and chat actions can be accessed by a tenant token. + if A != actions::SEARCH && A != actions::CHAT_COMPLETIONS { return Ok(TenantTokenOutcome::NotATenantToken); } diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index d83786394..43d7afe0e 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -37,6 +37,7 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use meilisearch_auth::{open_auth_store_env, AuthController}; use meilisearch_types::milli::constants::VERSION_MAJOR; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; +use meilisearch_types::milli::progress::{EmbedderStats, Progress}; use meilisearch_types::milli::update::{ default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, }; @@ -236,6 +237,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Arc< instance_features: opt.to_instance_features(), auto_upgrade: opt.experimental_dumpless_upgrade, embedding_cache_cap: opt.experimental_embedding_cache_entries, + experimental_no_snapshot_compaction: opt.experimental_no_snapshot_compaction, }; let binary_version = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH); @@ -462,6 +464,7 @@ fn import_dump( index_scheduler: &mut IndexScheduler, auth: &mut AuthController, ) -> Result<(), anyhow::Error> { + let progress = Progress::default(); let reader = File::open(dump_path)?; let mut dump_reader = dump::DumpReader::open(reader)?; @@ -495,14 +498,20 @@ fn import_dump( keys.push(key); } - // 3. Import the runtime features and network + // 3. Import the `ChatCompletionSettings`s. + for result in dump_reader.chat_completions_settings()? { + let (name, settings) = result?; + index_scheduler.put_chat_settings(&name, &settings)?; + } + + // 4. Import the runtime features and network let features = dump_reader.features()?.unwrap_or_default(); index_scheduler.put_runtime_features(features)?; let network = dump_reader.network()?.cloned().unwrap_or_default(); index_scheduler.put_network(network)?; - // 3.1 Use all cpus to process dump if `max_indexing_threads` not configured + // 4.1 Use all cpus to process dump if `max_indexing_threads` not configured let backup_config; let base_config = index_scheduler.indexer_config(); @@ -519,7 +528,7 @@ fn import_dump( // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might // try to process tasks while we're trying to import the indexes. - // 4. Import the indexes. + // 5. Import the indexes. for index_reader in dump_reader.indexes()? { let mut index_reader = index_reader?; let metadata = index_reader.metadata(); @@ -532,20 +541,20 @@ fn import_dump( let mut wtxn = index.write_txn()?; let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config); - // 4.1 Import the primary key if there is one. + // 5.1 Import the primary key if there is one. if let Some(ref primary_key) = metadata.primary_key { builder.set_primary_key(primary_key.to_string()); } - // 4.2 Import the settings. + // 5.2 Import the settings. tracing::info!("Importing the settings."); let settings = index_reader.settings()?; apply_settings_to_builder(&settings, &mut builder); - builder - .execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?; + let embedder_stats: Arc = Default::default(); + builder.execute(&|| false, &progress, embedder_stats.clone())?; - // 4.3 Import the documents. - // 4.3.1 We need to recreate the grenad+obkv format accepted by the index. + // 5.3 Import the documents. + // 5.3.1 We need to recreate the grenad+obkv format accepted by the index. tracing::info!("Importing the documents."); let file = tempfile::tempfile()?; let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); @@ -556,11 +565,11 @@ fn import_dump( // This flush the content of the batch builder. let file = builder.into_inner()?.into_inner()?; - // 4.3.2 We feed it to the milli index. + // 5.3.2 We feed it to the milli index. let reader = BufReader::new(file); let reader = DocumentsBatchReader::from_reader(reader)?; - let embedder_configs = index.embedding_configs(&wtxn)?; + let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?; let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?; let builder = milli::update::IndexDocuments::new( @@ -573,6 +582,7 @@ fn import_dump( }, |indexing_step| tracing::trace!("update: {:?}", indexing_step), || false, + &embedder_stats, )?; let builder = builder.with_embedders(embedders); @@ -587,15 +597,15 @@ fn import_dump( index_scheduler.refresh_index_stats(&uid)?; } - // 5. Import the queue + // 6. Import the queue let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; - // 5.1. Import the batches + // 6.1. Import the batches for ret in dump_reader.batches()? { let batch = ret?; index_scheduler_dump.register_dumped_batch(batch)?; } - // 5.2. Import the tasks + // 6.2. Import the tasks for ret in dump_reader.tasks()? { let (task, file) = ret?; index_scheduler_dump.register_dumped_task(task, file)?; diff --git a/crates/meilisearch/src/metrics.rs b/crates/meilisearch/src/metrics.rs index 29c1aeae8..d52e04cc6 100644 --- a/crates/meilisearch/src/metrics.rs +++ b/crates/meilisearch/src/metrics.rs @@ -15,6 +15,33 @@ lazy_static! { "Meilisearch number of degraded search requests" )) .expect("Can't create a metric"); + pub static ref MEILISEARCH_CHAT_SEARCH_REQUESTS: IntCounterVec = register_int_counter_vec!( + opts!( + "meilisearch_chat_search_requests", + "Meilisearch number of search requests performed by the chat route itself" + ), + &["type"] + ) + .expect("Can't create a metric"); + pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!( + opts!("meilisearch_chat_prompt_tokens_usage", "Meilisearch Chat Prompt Tokens Usage"), + &["workspace", "model"] + ) + .expect("Can't create a metric"); + pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE: IntCounterVec = + register_int_counter_vec!( + opts!( + "meilisearch_chat_completion_tokens_usage", + "Meilisearch Chat Completion Tokens Usage" + ), + &["workspace", "model"] + ) + .expect("Can't create a metric"); + pub static ref MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!( + opts!("meilisearch_chat_total_tokens_usage", "Meilisearch Chat Total Tokens Usage"), + &["workspace", "model"] + ) + .expect("Can't create a metric"); pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) .expect("Can't create a metric"); diff --git a/crates/meilisearch/src/option.rs b/crates/meilisearch/src/option.rs index d98b9aa8b..9658352c8 100644 --- a/crates/meilisearch/src/option.rs +++ b/crates/meilisearch/src/option.rs @@ -53,6 +53,8 @@ const MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE: &str = "MEILI_EXPERIMENTAL_DUMPLESS_U const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER"; +const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str = + "MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE"; const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER"; @@ -62,9 +64,10 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS"; const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str = - "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE"; + "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE"; const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str = "MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES"; +const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; @@ -455,6 +458,15 @@ pub struct Opt { #[serde(default = "default_embedding_cache_entries")] pub experimental_embedding_cache_entries: usize, + /// Experimental no snapshot compaction feature. + /// + /// When enabled, Meilisearch will not compact snapshots during creation. + /// + /// For more information, see . + #[clap(long, env = MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION)] + #[serde(default)] + pub experimental_no_snapshot_compaction: bool, + #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, @@ -559,6 +571,7 @@ impl Opt { experimental_max_number_of_batched_tasks, experimental_limit_batched_tasks_total_size, experimental_embedding_cache_entries, + experimental_no_snapshot_compaction, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); @@ -655,6 +668,10 @@ impl Opt { MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES, experimental_embedding_cache_entries.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION, + experimental_no_snapshot_compaction.to_string(), + ); indexer_options.export_to_env(); } @@ -734,12 +751,25 @@ pub struct IndexerOpts { #[clap(skip)] #[serde(skip)] pub skip_index_budget: bool, + + /// Experimental no edition 2024 for settings feature. For more information, + /// see: + /// + /// Enables the experimental no edition 2024 for settings feature. + #[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)] + #[serde(default)] + pub experimental_no_edition_2024_for_settings: bool, } impl IndexerOpts { /// Exports the values to their corresponding env vars if they are not set. pub fn export_to_env(self) { - let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self; + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + skip_index_budget: _, + experimental_no_edition_2024_for_settings, + } = self; if let Some(max_indexing_memory) = max_indexing_memory.0 { export_to_env_if_not_present( MEILI_MAX_INDEXING_MEMORY, @@ -752,6 +782,12 @@ impl IndexerOpts { max_indexing_threads.to_string(), ); } + if experimental_no_edition_2024_for_settings { + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS, + experimental_no_edition_2024_for_settings.to_string(), + ); + } } } @@ -770,7 +806,12 @@ impl TryFrom<&IndexerOpts> for IndexerConfig { max_threads: *other.max_indexing_threads, max_positions_per_attributes: None, skip_index_budget: other.skip_index_budget, - ..Default::default() + experimental_no_edition_2024_for_settings: other + .experimental_no_edition_2024_for_settings, + chunk_compression_type: Default::default(), + chunk_compression_level: Default::default(), + documents_chunk_size: Default::default(), + max_nb_chunks: Default::default(), }) } } diff --git a/crates/meilisearch/src/routes/chats/chat_completion_analytics.rs b/crates/meilisearch/src/routes/chats/chat_completion_analytics.rs new file mode 100644 index 000000000..c700894ca --- /dev/null +++ b/crates/meilisearch/src/routes/chats/chat_completion_analytics.rs @@ -0,0 +1,135 @@ +use std::collections::BinaryHeap; + +use serde_json::{json, Value}; + +use crate::analytics::Aggregate; + +#[derive(Default)] +pub struct ChatCompletionAggregator { + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // chat completion specific metrics + total_messages: usize, + total_streamed_requests: usize, + total_non_streamed_requests: usize, + + // model usage tracking + models_used: std::collections::HashMap, +} + +impl ChatCompletionAggregator { + pub fn from_request(model: &str, message_count: usize, is_stream: bool) -> Self { + let mut models_used = std::collections::HashMap::new(); + models_used.insert(model.to_string(), 1); + + Self { + total_received: 1, + total_succeeded: 0, + time_spent: BinaryHeap::new(), + + total_messages: message_count, + total_streamed_requests: if is_stream { 1 } else { 0 }, + total_non_streamed_requests: if is_stream { 0 } else { 1 }, + + models_used, + } + } + + pub fn succeed(&mut self, time_spent: std::time::Duration) { + self.total_succeeded += 1; + self.time_spent.push(time_spent.as_millis() as usize); + } +} + +impl Aggregate for ChatCompletionAggregator { + fn event_name(&self) -> &'static str { + "Chat Completion POST" + } + + fn aggregate(mut self: Box, new: Box) -> Box { + let Self { + total_received, + total_succeeded, + mut time_spent, + total_messages, + total_streamed_requests, + total_non_streamed_requests, + models_used, + .. + } = *new; + + // Aggregate time spent + self.time_spent.append(&mut time_spent); + + // Aggregate counters + self.total_received = self.total_received.saturating_add(total_received); + self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.total_messages = self.total_messages.saturating_add(total_messages); + self.total_streamed_requests = + self.total_streamed_requests.saturating_add(total_streamed_requests); + self.total_non_streamed_requests = + self.total_non_streamed_requests.saturating_add(total_non_streamed_requests); + + // Aggregate model usage + for (model, count) in models_used { + *self.models_used.entry(model).or_insert(0) += count; + } + + self + } + + fn into_event(self: Box) -> Value { + let Self { + total_received, + total_succeeded, + time_spent, + total_messages, + total_streamed_requests, + total_non_streamed_requests, + models_used, + .. + } = *self; + + // Compute time statistics + let time_spent: Vec = time_spent.into_sorted_vec(); + let (max_time, min_time, avg_time) = if time_spent.is_empty() { + (0, 0, 0) + } else { + let max_time = time_spent.last().unwrap_or(&0); + let min_time = time_spent.first().unwrap_or(&0); + let sum: usize = time_spent.iter().sum(); + let avg_time = sum / time_spent.len(); + (*max_time, *min_time, avg_time) + }; + + // Compute average messages per request + let avg_messages_per_request = + if total_received > 0 { total_messages as f64 / total_received as f64 } else { 0.0 }; + + // Compute streaming vs non-streaming proportions + let streaming_ratio = if total_received > 0 { + total_streamed_requests as f64 / total_received as f64 + } else { + 0.0 + }; + + json!({ + "total_received": total_received, + "total_succeeded": total_succeeded, + "time_spent": { + "max": max_time, + "min": min_time, + "avg": avg_time + }, + "total_messages": total_messages, + "avg_messages_per_request": avg_messages_per_request, + "total_streamed_requests": total_streamed_requests, + "total_non_streamed_requests": total_non_streamed_requests, + "streaming_ratio": streaming_ratio, + "models_used": models_used, + }) + } +} diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs new file mode 100644 index 000000000..4f7087ae8 --- /dev/null +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -0,0 +1,804 @@ +use std::collections::HashMap; +use std::fmt::Write as _; +use std::mem; +use std::ops::ControlFlow; +use std::time::Duration; + +use actix_web::web::{self, Data}; +use actix_web::{Either, HttpRequest, HttpResponse, Responder}; +use actix_web_lab::sse::{Event, Sse}; +use async_openai::types::{ + ChatCompletionMessageToolCall, ChatCompletionMessageToolCallChunk, + ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestDeveloperMessage, + ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage, + ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent, + ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent, + ChatCompletionStreamOptions, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, + ChatCompletionToolType, CreateChatCompletionRequest, CreateChatCompletionStreamResponse, + FinishReason, FunctionCall, FunctionCallStream, FunctionObjectArgs, +}; +use async_openai::Client; +use bumpalo::Bump; +use futures::StreamExt; +use index_scheduler::IndexScheduler; +use meilisearch_auth::AuthController; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::features::{ + ChatCompletionPrompts as DbChatCompletionPrompts, + ChatCompletionSource as DbChatCompletionSource, SystemRole, +}; +use meilisearch_types::keys::actions; +use meilisearch_types::milli::index::ChatConfig; +use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget}; +use meilisearch_types::{Document, Index}; +use serde::Deserialize; +use serde_json::json; +use tokio::runtime::Handle; +use tokio::sync::mpsc::error::SendError; + +use super::chat_completion_analytics::ChatCompletionAggregator; +use super::config::Config; +use super::errors::{MistralError, OpenAiOutsideError, StreamErrorEvent}; +use super::utils::format_documents; +use super::{ + ChatsParam, MEILI_APPEND_CONVERSATION_MESSAGE_NAME, MEILI_SEARCH_IN_INDEX_FUNCTION_NAME, + MEILI_SEARCH_PROGRESS_NAME, MEILI_SEARCH_SOURCES_NAME, +}; +use crate::analytics::Analytics; +use crate::error::MeilisearchHttpError; +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _}; +use crate::metrics::{ + MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE, MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE, + MEILISEARCH_CHAT_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE, + MEILISEARCH_DEGRADED_SEARCH_REQUESTS, +}; +use crate::routes::chats::utils::SseEventSender; +use crate::routes::indexes::search::search_kind; +use crate::search::{add_search_rules, prepare_search, search_from_kind, SearchQuery}; +use crate::search_queue::SearchQueue; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(chat))); +} + +/// Get a chat completion +async fn chat( + index_scheduler: GuardedData, Data>, + auth_ctrl: web::Data, + chats_param: web::Path, + req: HttpRequest, + search_queue: web::Data, + web::Json(chat_completion): web::Json, + analytics: web::Data, +) -> impl Responder { + let ChatsParam { workspace_uid } = chats_param.into_inner(); + + if chat_completion.stream.unwrap_or(false) { + Either::Right( + streamed_chat( + index_scheduler, + auth_ctrl, + search_queue, + &workspace_uid, + req, + chat_completion, + analytics, + ) + .await, + ) + } else { + Either::Left( + non_streamed_chat( + index_scheduler, + auth_ctrl, + search_queue, + &workspace_uid, + req, + chat_completion, + analytics, + ) + .await, + ) + } +} + +#[derive(Default, Debug, Clone, Copy)] +pub struct FunctionSupport { + /// Defines if we can call the _meiliSearchProgress function + /// to inform the front-end about what we are searching for. + report_progress: bool, + /// Defines if we can call the _meiliSearchSources function + /// to inform the front-end about the sources of the search. + report_sources: bool, + /// Defines if we can call the _meiliAppendConversationMessage + /// function to provide the messages to append into the conversation. + append_to_conversation: bool, +} + +/// Setup search tool in chat completion request +fn setup_search_tool( + index_scheduler: &Data, + filters: &meilisearch_auth::AuthFilter, + chat_completion: &mut CreateChatCompletionRequest, + prompts: &DbChatCompletionPrompts, + system_role: SystemRole, +) -> Result { + let tools = chat_completion.tools.get_or_insert_default(); + for tool in &tools[..] { + match tool.function.name.as_str() { + MEILI_SEARCH_IN_INDEX_FUNCTION_NAME => { + return Err(ResponseError::from_msg( + format!("{MEILI_SEARCH_IN_INDEX_FUNCTION_NAME} function is already defined."), + Code::BadRequest, + )); + } + MEILI_SEARCH_PROGRESS_NAME + | MEILI_SEARCH_SOURCES_NAME + | MEILI_APPEND_CONVERSATION_MESSAGE_NAME => (), + external_function_name => { + return Err(ResponseError::from_msg( + format!("{external_function_name}: External functions are not supported yet."), + Code::UnimplementedExternalFunctionCalling, + )); + } + } + } + + // Remove internal tools used for front-end notifications as they should be hidden from the LLM. + let mut report_progress = false; + let mut report_sources = false; + let mut append_to_conversation = false; + tools.retain(|tool| { + match tool.function.name.as_str() { + MEILI_SEARCH_PROGRESS_NAME => { + report_progress = true; + false + } + MEILI_SEARCH_SOURCES_NAME => { + report_sources = true; + false + } + MEILI_APPEND_CONVERSATION_MESSAGE_NAME => { + append_to_conversation = true; + false + } + _ => true, // keep other tools + } + }); + + let mut index_uids = Vec::new(); + let mut function_description = prompts.search_description.clone(); + index_scheduler.try_for_each_index::<_, ()>(|name, index| { + // Make sure to skip unauthorized indexes + if !filters.is_index_authorized(name) { + return Ok(()); + } + + let rtxn = index.read_txn()?; + let chat_config = index.chat_config(&rtxn)?; + let index_description = chat_config.description; + let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n"); + index_uids.push(name.to_string()); + + Ok(()) + })?; + + let tool = ChatCompletionToolArgs::default() + .r#type(ChatCompletionToolType::Function) + .function( + FunctionObjectArgs::default() + .name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME) + .description(&function_description) + .parameters(json!({ + "type": "object", + "properties": { + "index_uid": { + "type": "string", + "enum": index_uids, + "description": prompts.search_index_uid_param, + }, + "q": { + // Unfortunately, Mistral does not support an array of types, here. + // "type": ["string", "null"], + "type": "string", + "description": prompts.search_q_param, + } + }, + "required": ["index_uid", "q"], + "additionalProperties": false, + })) + .strict(true) + .build() + .unwrap(), + ) + .build() + .unwrap(); + + tools.push(tool); + + let system_message = match system_role { + SystemRole::System => { + ChatCompletionRequestMessage::System(ChatCompletionRequestSystemMessage { + content: ChatCompletionRequestSystemMessageContent::Text(prompts.system.clone()), + name: None, + }) + } + SystemRole::Developer => { + ChatCompletionRequestMessage::Developer(ChatCompletionRequestDeveloperMessage { + content: ChatCompletionRequestDeveloperMessageContent::Text(prompts.system.clone()), + name: None, + }) + } + }; + chat_completion.messages.insert(0, system_message); + + Ok(FunctionSupport { report_progress, report_sources, append_to_conversation }) +} + +/// Process search request and return formatted results +async fn process_search_request( + index_scheduler: &GuardedData< + ActionPolicy<{ actions::CHAT_COMPLETIONS }>, + Data, + >, + auth_ctrl: web::Data, + search_queue: &web::Data, + auth_token: &str, + index_uid: String, + q: Option, +) -> Result<(Index, Vec, String), ResponseError> { + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.static_read_txn()?; + let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?; + let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) }; + let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate( + auth_ctrl, + auth_token, + Some(index_uid.as_str()), + )?; + + // Tenant token search_rules. + if let Some(search_rules) = auth_filter.get_index_search_rules(&index_uid) { + add_search_rules(&mut query.filter, search_rules); + } + let search_kind = + search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; + + let permit = search_queue.try_get_search_permit().await?; + let features = index_scheduler.features(); + let index_cloned = index.clone(); + let output = tokio::task::spawn_blocking(move || -> Result<_, ResponseError> { + let time_budget = match index_cloned + .search_cutoff(&rtxn) + .map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.clone())))? + { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; + + let (search, _is_finite_pagination, _max_total_hits, _offset) = + prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?; + + search_from_kind(index_uid, search_kind, search) + .map(|(search_results, _)| (rtxn, search_results)) + .map_err(ResponseError::from) + }) + .await; + permit.drop().await; + + let output = output?; + let mut documents = Vec::new(); + if let Ok((ref rtxn, ref search_result)) = output { + MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc(); + if search_result.degraded { + MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); + } + + let fields_ids_map = index.fields_ids_map(rtxn)?; + let displayed_fields = index.displayed_fields_ids(rtxn)?; + for &document_id in &search_result.documents_ids { + let obkv = index.document(rtxn, document_id)?; + let document = match displayed_fields { + Some(ref fields) => obkv_to_json(fields, &fields_ids_map, obkv)?, + None => all_obkv_to_json(obkv, &fields_ids_map)?, + }; + documents.push(document); + } + } + + let (rtxn, search_result) = output?; + let render_alloc = Bump::new(); + let formatted = format_documents(&rtxn, &index, &render_alloc, search_result.documents_ids)?; + let text = formatted.join("\n"); + drop(rtxn); + + Ok((index, documents, text)) +} + +#[allow(unreachable_code, unused_variables)] // will be correctly implemented in the future +async fn non_streamed_chat( + index_scheduler: GuardedData, Data>, + auth_ctrl: web::Data, + search_queue: web::Data, + workspace_uid: &str, + req: HttpRequest, + chat_completion: CreateChatCompletionRequest, + analytics: web::Data, +) -> Result { + index_scheduler.features().check_chat_completions("using the /chats chat completions route")?; + + // Create analytics aggregator + let aggregate = ChatCompletionAggregator::from_request( + &chat_completion.model, + chat_completion.messages.len(), + false, // non_streamed_chat is not streaming + ); + let start_time = std::time::Instant::now(); + + if let Some(n) = chat_completion.n.filter(|&n| n != 1) { + return Err(ResponseError::from_msg( + format!("You tried to specify n = {n} but only single choices are supported (n = 1)."), + Code::UnimplementedMultiChoiceChatCompletions, + )); + } + + return Err(ResponseError::from_msg( + "Non-streamed chat completions is not implemented".to_string(), + Code::UnimplementedNonStreamingChatCompletions, + )); + + let filters = index_scheduler.filters(); + let chat_settings = match index_scheduler.chat_settings(workspace_uid).unwrap() { + Some(settings) => settings, + None => { + return Err(ResponseError::from_msg( + format!("Chat `{workspace_uid}` not found"), + Code::ChatNotFound, + )) + } + }; + + let config = Config::new(&chat_settings); + let client = Client::with_config(config); + let auth_token = extract_token_from_request(&req)?.unwrap(); + let system_role = chat_settings.source.system_role(&chat_completion.model); + // TODO do function support later + let _function_support = setup_search_tool( + &index_scheduler, + filters, + &mut chat_completion, + &chat_settings.prompts, + system_role, + )?; + + let mut response; + loop { + response = client.chat().create(chat_completion.clone()).await.unwrap(); + + let choice = &mut response.choices[0]; + match choice.finish_reason { + Some(FinishReason::ToolCalls) => { + let tool_calls = mem::take(&mut choice.message.tool_calls).unwrap_or_default(); + + let (meili_calls, other_calls): (Vec<_>, Vec<_>) = tool_calls + .into_iter() + .partition(|call| call.function.name == MEILI_SEARCH_IN_INDEX_FUNCTION_NAME); + + chat_completion.messages.push( + ChatCompletionRequestAssistantMessageArgs::default() + .tool_calls(meili_calls.clone()) + .build() + .unwrap() + .into(), + ); + + for call in meili_calls { + let result = match serde_json::from_str(&call.function.arguments) { + Ok(SearchInIndexParameters { index_uid, q }) => process_search_request( + &index_scheduler, + auth_ctrl.clone(), + &search_queue, + auth_token, + index_uid, + q, + ) + .await + .map_err(|e| e.to_string()), + Err(err) => Err(err.to_string()), + }; + + // TODO report documents sources later + let answer = match result { + Ok((_, _documents, text)) => text, + Err(err) => err, + }; + + chat_completion.messages.push(ChatCompletionRequestMessage::Tool( + ChatCompletionRequestToolMessage { + tool_call_id: call.id.clone(), + content: ChatCompletionRequestToolMessageContent::Text(answer), + }, + )); + } + + // Let the client call other tools by themselves + if !other_calls.is_empty() { + response.choices[0].message.tool_calls = Some(other_calls); + break; + } + } + _ => break, + } + } + + // Record success in analytics + let mut aggregate = aggregate; + aggregate.succeed(start_time.elapsed()); + analytics.publish(aggregate, &req); + + Ok(HttpResponse::Ok().json(response)) +} + +async fn streamed_chat( + index_scheduler: GuardedData, Data>, + auth_ctrl: web::Data, + search_queue: web::Data, + workspace_uid: &str, + req: HttpRequest, + mut chat_completion: CreateChatCompletionRequest, + analytics: web::Data, +) -> Result { + index_scheduler.features().check_chat_completions("using the /chats chat completions route")?; + let filters = index_scheduler.filters(); + + if let Some(n) = chat_completion.n.filter(|&n| n != 1) { + return Err(ResponseError::from_msg( + format!("You tried to specify n = {n} but only single choices are supported (n = 1)."), + Code::UnimplementedMultiChoiceChatCompletions, + )); + } + + let chat_settings = match index_scheduler.chat_settings(workspace_uid)? { + Some(settings) => settings, + None => { + return Err(ResponseError::from_msg( + format!("Chat `{workspace_uid}` not found"), + Code::ChatNotFound, + )) + } + }; + + // Create analytics aggregator + let mut aggregate = ChatCompletionAggregator::from_request( + &chat_completion.model, + chat_completion.messages.len(), + true, // streamed_chat is always streaming + ); + let start_time = std::time::Instant::now(); + + let config = Config::new(&chat_settings); + let auth_token = extract_token_from_request(&req)?.unwrap().to_string(); + let system_role = chat_settings.source.system_role(&chat_completion.model); + let function_support = setup_search_tool( + &index_scheduler, + filters, + &mut chat_completion, + &chat_settings.prompts, + system_role, + )?; + + tracing::debug!("Conversation function support: {function_support:?}"); + + let (tx, rx) = tokio::sync::mpsc::channel(10); + let tx = SseEventSender::new(tx); + let workspace_uid = workspace_uid.to_string(); + let _join_handle = Handle::current().spawn(async move { + let client = Client::with_config(config.clone()); + let mut global_tool_calls = HashMap::::new(); + + // Limit the number of internal calls to satisfy the search requests of the LLM + for _ in 0..20 { + let output = run_conversation( + &index_scheduler, + &auth_ctrl, + &workspace_uid, + &search_queue, + &auth_token, + &client, + chat_settings.source, + &mut chat_completion, + &tx, + &mut global_tool_calls, + function_support, + ); + + match output.await { + Ok(ControlFlow::Continue(())) => (), + Ok(ControlFlow::Break(_finish_reason)) => break, + // If the connection is closed we must stop + Err(SendError(_)) => return, + } + } + + let _ = tx.stop().await; + }); + + // Record success in analytics after the stream is set up + aggregate.succeed(start_time.elapsed()); + analytics.publish(aggregate, &req); + + Ok(Sse::from_infallible_receiver(rx).with_retry_duration(Duration::from_secs(10))) +} + +/// Updates the chat completion with the new messages, streams the LLM tokens, +/// and report progress and errors. +#[allow(clippy::too_many_arguments)] +async fn run_conversation( + index_scheduler: &GuardedData< + ActionPolicy<{ actions::CHAT_COMPLETIONS }>, + Data, + >, + auth_ctrl: &web::Data, + workspace_uid: &str, + search_queue: &web::Data, + auth_token: &str, + client: &Client, + source: DbChatCompletionSource, + chat_completion: &mut CreateChatCompletionRequest, + tx: &SseEventSender, + global_tool_calls: &mut HashMap, + function_support: FunctionSupport, +) -> Result, ()>, SendError> { + use DbChatCompletionSource::*; + + let mut finish_reason = None; + chat_completion.stream_options = match source { + OpenAi | AzureOpenAi => Some(ChatCompletionStreamOptions { include_usage: true }), + Mistral | VLlm => None, + }; + + // safety: unwrap: can only happens if `stream` was set to `false` + let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap(); + while let Some(result) = response.next().await { + match result { + Ok(resp) => { + if let Some(usage) = resp.usage.as_ref() { + MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE + .with_label_values(&[workspace_uid, &chat_completion.model]) + .inc_by(usage.prompt_tokens as u64); + MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE + .with_label_values(&[workspace_uid, &chat_completion.model]) + .inc_by(usage.completion_tokens as u64); + MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE + .with_label_values(&[workspace_uid, &chat_completion.model]) + .inc_by(usage.total_tokens as u64); + } + let choice = match resp.choices.first() { + Some(choice) => choice, + None => break, + }; + finish_reason = choice.finish_reason; + + let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta; + + match tool_calls { + Some(tool_calls) => { + for chunk in tool_calls { + let ChatCompletionMessageToolCallChunk { + index, + id, + r#type: _, + function, + } = chunk; + let FunctionCallStream { name, arguments } = function.as_ref().unwrap(); + + global_tool_calls + .entry(*index) + .and_modify(|call| { + if call.is_internal() { + call.append(arguments.as_ref().unwrap()) + } + }) + .or_insert_with(|| { + if name.as_deref() == Some(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME) + { + Call::Internal { + id: id.as_ref().unwrap().clone(), + function_name: name.as_ref().unwrap().clone(), + arguments: arguments.as_ref().unwrap().clone(), + } + } else { + Call::External + } + }); + } + } + None => { + if !global_tool_calls.is_empty() { + let (meili_calls, _other_calls): (Vec<_>, Vec<_>) = + mem::take(global_tool_calls) + .into_values() + .flat_map(|call| match call { + Call::Internal { id, function_name: name, arguments } => { + Some(ChatCompletionMessageToolCall { + id, + r#type: Some(ChatCompletionToolType::Function), + function: FunctionCall { name, arguments }, + }) + } + Call::External => None, + }) + .partition(|call| { + call.function.name == MEILI_SEARCH_IN_INDEX_FUNCTION_NAME + }); + + chat_completion.messages.push( + ChatCompletionRequestAssistantMessageArgs::default() + .tool_calls(meili_calls.clone()) + .build() + .unwrap() + .into(), + ); + + handle_meili_tools( + index_scheduler, + auth_ctrl, + search_queue, + auth_token, + tx, + meili_calls, + chat_completion, + &resp, + function_support, + ) + .await?; + } else { + tx.forward_response(&resp).await?; + } + } + } + } + Err(error) => { + let result = match source { + DbChatCompletionSource::Mistral => { + StreamErrorEvent::from_openai_error::(error).await + } + _ => StreamErrorEvent::from_openai_error::(error).await, + }; + let error = result.unwrap_or_else(StreamErrorEvent::from_reqwest_error); + tx.send_error(&error).await?; + return Ok(ControlFlow::Break(None)); + } + } + } + + // We must stop if the finish reason is not something we can solve with Meilisearch + match finish_reason { + Some(FinishReason::ToolCalls) => Ok(ControlFlow::Continue(())), + otherwise => Ok(ControlFlow::Break(otherwise)), + } +} + +#[allow(clippy::too_many_arguments)] +async fn handle_meili_tools( + index_scheduler: &GuardedData< + ActionPolicy<{ actions::CHAT_COMPLETIONS }>, + Data, + >, + auth_ctrl: &web::Data, + search_queue: &web::Data, + auth_token: &str, + tx: &SseEventSender, + meili_calls: Vec, + chat_completion: &mut CreateChatCompletionRequest, + resp: &CreateChatCompletionStreamResponse, + FunctionSupport { report_progress, report_sources, append_to_conversation, .. }: FunctionSupport, +) -> Result<(), SendError> { + for call in meili_calls { + if report_progress { + tx.report_search_progress( + resp.clone(), + &call.id, + &call.function.name, + &call.function.arguments, + ) + .await?; + } + + if append_to_conversation { + tx.append_tool_call_conversation_message( + resp.clone(), + call.id.clone(), + call.function.name.clone(), + call.function.arguments.clone(), + ) + .await?; + } + + let mut error = None; + + let result = match serde_json::from_str(&call.function.arguments) { + Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request( + index_scheduler, + auth_ctrl.clone(), + search_queue, + auth_token, + index_uid, + q, + ) + .await + { + Ok(output) => Ok(output), + Err(err) => { + let error_text = format!("the search tool call failed with {err}"); + error = Some(err); + Err(error_text) + } + }, + Err(err) => Err(err.to_string()), + }; + + let answer = match result { + Ok((_index, documents, text)) => { + if report_sources { + tx.report_sources(resp.clone(), &call.id, &documents).await?; + } + text + } + Err(err) => err, + }; + + let tool = ChatCompletionRequestMessage::Tool(ChatCompletionRequestToolMessage { + tool_call_id: call.id.clone(), + content: ChatCompletionRequestToolMessageContent::Text(answer), + }); + + if append_to_conversation { + tx.append_conversation_message(resp.clone(), &tool).await?; + } + + chat_completion.messages.push(tool); + + if let Some(error) = error { + tx.send_error(&StreamErrorEvent::from_response_error(error)).await?; + } + } + + Ok(()) +} + +/// The structure used to aggregate the function calls to make. +#[derive(Debug)] +enum Call { + /// Tool calls to tools that must be managed by Meilisearch internally. + /// Typically the search functions. + Internal { id: String, function_name: String, arguments: String }, + /// Tool calls that we track but only to know that its not our functions. + /// We return the function calls as-is to the end-user. + External, +} + +impl Call { + fn is_internal(&self) -> bool { + matches!(self, Call::Internal { .. }) + } + + /// # Panics + /// + /// - if called on external calls + fn append(&mut self, more: &str) { + match self { + Call::Internal { arguments, .. } => arguments.push_str(more), + Call::External => panic!("Cannot append argument chunks to an external function"), + } + } +} + +#[derive(Deserialize)] +struct SearchInIndexParameters { + /// The index uid to search in. + index_uid: String, + /// The query parameter to use. + q: Option, +} diff --git a/crates/meilisearch/src/routes/chats/config.rs b/crates/meilisearch/src/routes/chats/config.rs new file mode 100644 index 000000000..d4426a97a --- /dev/null +++ b/crates/meilisearch/src/routes/chats/config.rs @@ -0,0 +1,88 @@ +use async_openai::config::{AzureConfig, OpenAIConfig}; +use meilisearch_types::features::ChatCompletionSettings as DbChatSettings; +use reqwest::header::HeaderMap; +use secrecy::SecretString; + +#[derive(Debug, Clone)] +pub enum Config { + OpenAiCompatible(OpenAIConfig), + AzureOpenAiCompatible(AzureConfig), +} + +impl Config { + pub fn new(chat_settings: &DbChatSettings) -> Self { + use meilisearch_types::features::ChatCompletionSource::*; + match chat_settings.source { + OpenAi | Mistral | VLlm => { + let mut config = OpenAIConfig::default(); + if let Some(org_id) = chat_settings.org_id.as_ref() { + config = config.with_org_id(org_id); + } + if let Some(project_id) = chat_settings.project_id.as_ref() { + config = config.with_project_id(project_id); + } + if let Some(api_key) = chat_settings.api_key.as_ref() { + config = config.with_api_key(api_key); + } + let base_url = chat_settings.base_url.as_deref(); + if let Some(base_url) = chat_settings.source.base_url().or(base_url) { + config = config.with_api_base(base_url); + } + Self::OpenAiCompatible(config) + } + AzureOpenAi => { + let mut config = AzureConfig::default(); + if let Some(version) = chat_settings.api_version.as_ref() { + config = config.with_api_version(version); + } + if let Some(deployment_id) = chat_settings.deployment_id.as_ref() { + config = config.with_deployment_id(deployment_id); + } + if let Some(api_key) = chat_settings.api_key.as_ref() { + config = config.with_api_key(api_key); + } + if let Some(base_url) = chat_settings.base_url.as_ref() { + config = config.with_api_base(base_url); + } + Self::AzureOpenAiCompatible(config) + } + } + } +} + +impl async_openai::config::Config for Config { + fn headers(&self) -> HeaderMap { + match self { + Config::OpenAiCompatible(config) => config.headers(), + Config::AzureOpenAiCompatible(config) => config.headers(), + } + } + + fn url(&self, path: &str) -> String { + match self { + Config::OpenAiCompatible(config) => config.url(path), + Config::AzureOpenAiCompatible(config) => config.url(path), + } + } + + fn query(&self) -> Vec<(&str, &str)> { + match self { + Config::OpenAiCompatible(config) => config.query(), + Config::AzureOpenAiCompatible(config) => config.query(), + } + } + + fn api_base(&self) -> &str { + match self { + Config::OpenAiCompatible(config) => config.api_base(), + Config::AzureOpenAiCompatible(config) => config.api_base(), + } + } + + fn api_key(&self) -> &SecretString { + match self { + Config::OpenAiCompatible(config) => config.api_key(), + Config::AzureOpenAiCompatible(config) => config.api_key(), + } + } +} diff --git a/crates/meilisearch/src/routes/chats/errors.rs b/crates/meilisearch/src/routes/chats/errors.rs new file mode 100644 index 000000000..e7fb661ed --- /dev/null +++ b/crates/meilisearch/src/routes/chats/errors.rs @@ -0,0 +1,250 @@ +use async_openai::error::{ApiError, OpenAIError}; +use async_openai::reqwest_eventsource::Error as EventSourceError; +use meilisearch_types::error::ResponseError; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// The error type which is always `error`. +const ERROR_TYPE: &str = "error"; + +/// The error struct returned by the Mistral API. +/// +/// ```json +/// { +/// "object": "error", +/// "message": "Service tier capacity exceeded for this model.", +/// "type": "invalid_request_error", +/// "param": null, +/// "code": null +/// } +/// ``` +#[derive(Debug, Clone, Deserialize)] +pub struct MistralError { + message: String, + r#type: String, + param: Option, + code: Option, +} + +impl From for StreamErrorEvent { + fn from(error: MistralError) -> Self { + let MistralError { message, r#type, param, code } = error; + StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_owned(), + error: StreamError { r#type, code, message, param, event_id: None }, + } + } +} + +#[derive(Debug, Clone, Deserialize)] +pub struct OpenAiOutsideError { + /// Emitted when an error occurs. + error: OpenAiInnerError, +} + +/// Emitted when an error occurs. +#[derive(Debug, Clone, Deserialize)] +pub struct OpenAiInnerError { + /// The error code. + code: Option, + /// The error message. + message: String, + /// The error parameter. + param: Option, + /// The type of the event. Always `error`. + r#type: String, +} + +impl From for StreamErrorEvent { + fn from(error: OpenAiOutsideError) -> Self { + let OpenAiOutsideError { error: OpenAiInnerError { code, message, param, r#type } } = error; + StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { r#type, code, message, param, event_id: None }, + } + } +} + +/// An error that occurs during the streaming process. +/// +/// It directly comes from the OpenAI API and you can +/// read more about error events on their website: +/// +#[derive(Debug, Serialize, Deserialize)] +pub struct StreamErrorEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The event type, must be error. + pub r#type: String, + /// Details of the error. + pub error: StreamError, +} + +/// Details of the error. +#[derive(Debug, Serialize, Deserialize)] +pub struct StreamError { + /// The type of error (e.g., "invalid_request_error", "server_error"). + pub r#type: String, + /// Error code, if any. + pub code: Option, + /// A human-readable error message. + pub message: String, + /// Parameter related to the error, if any. + pub param: Option, + /// The event_id of the client event that caused the error, if applicable. + pub event_id: Option, +} + +impl StreamErrorEvent { + pub async fn from_openai_error(error: OpenAIError) -> Result + where + E: serde::de::DeserializeOwned, + Self: From, + { + match error { + OpenAIError::Reqwest(e) => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "internal_reqwest_error".to_string(), + code: Some("internal".to_string()), + message: e.to_string(), + param: None, + event_id: None, + }, + }), + OpenAIError::ApiError(ApiError { message, r#type, param, code }) => { + Ok(StreamErrorEvent { + r#type: ERROR_TYPE.to_string(), + event_id: Uuid::new_v4().to_string(), + error: StreamError { + r#type: r#type.unwrap_or_else(|| "unknown".to_string()), + code, + message, + param, + event_id: None, + }, + }) + } + OpenAIError::JSONDeserialize(error) => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "json_deserialize_error".to_string(), + code: Some("internal".to_string()), + message: error.to_string(), + param: None, + event_id: None, + }, + }), + OpenAIError::FileSaveError(_) | OpenAIError::FileReadError(_) => unreachable!(), + OpenAIError::StreamError(error) => match error { + EventSourceError::InvalidStatusCode(_status_code, response) => { + let error = response.json::().await?; + Ok(StreamErrorEvent::from(error)) + } + EventSourceError::InvalidContentType(_header_value, response) => { + let error = response.json::().await?; + Ok(StreamErrorEvent::from(error)) + } + EventSourceError::Utf8(error) => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "invalid_utf8_error".to_string(), + code: None, + message: error.to_string(), + param: None, + event_id: None, + }, + }), + EventSourceError::Parser(error) => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "parser_error".to_string(), + code: None, + message: error.to_string(), + param: None, + event_id: None, + }, + }), + EventSourceError::Transport(error) => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "transport_error".to_string(), + code: None, + message: error.to_string(), + param: None, + event_id: None, + }, + }), + EventSourceError::InvalidLastEventId(message) => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "invalid_last_event_id".to_string(), + code: None, + message, + param: None, + event_id: None, + }, + }), + EventSourceError::StreamEnded => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "stream_ended".to_string(), + code: None, + message: "Stream ended".to_string(), + param: None, + event_id: None, + }, + }), + }, + OpenAIError::InvalidArgument(message) => Ok(StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "invalid_argument".to_string(), + code: None, + message, + param: None, + event_id: None, + }, + }), + } + } + + pub fn from_response_error(error: ResponseError) -> Self { + let ResponseError { code, message, .. } = error; + StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "response_error".to_string(), + code: Some(code.as_str().to_string()), + message, + param: None, + event_id: None, + }, + } + } + + pub fn from_reqwest_error(error: reqwest::Error) -> Self { + StreamErrorEvent { + event_id: Uuid::new_v4().to_string(), + r#type: ERROR_TYPE.to_string(), + error: StreamError { + r#type: "reqwest_error".to_string(), + code: None, + message: error.to_string(), + param: None, + event_id: None, + }, + } + } +} diff --git a/crates/meilisearch/src/routes/chats/mod.rs b/crates/meilisearch/src/routes/chats/mod.rs new file mode 100644 index 000000000..8633bd496 --- /dev/null +++ b/crates/meilisearch/src/routes/chats/mod.rs @@ -0,0 +1,135 @@ +use actix_web::web::{self, Data}; +use actix_web::HttpResponse; +use deserr::actix_web::AwebQueryParameter; +use deserr::Deserr; +use index_scheduler::IndexScheduler; +use meilisearch_types::deserr::query_params::Param; +use meilisearch_types::deserr::DeserrQueryParamError; +use meilisearch_types::error::deserr_codes::{InvalidIndexLimit, InvalidIndexOffset}; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::keys::actions; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use tracing::debug; +use utoipa::{IntoParams, ToSchema}; + +use super::Pagination; +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::GuardedData; +use crate::routes::PAGINATION_DEFAULT_LIMIT; + +mod chat_completion_analytics; +pub mod chat_completions; +mod config; +mod errors; +pub mod settings; +mod utils; + +/// The function name to report search progress. +/// This function is used to report on what meilisearch is +/// doing which must be used on the frontend to report progress. +const MEILI_SEARCH_PROGRESS_NAME: &str = "_meiliSearchProgress"; +/// The function name to append a conversation message in the user conversation. +/// This function is used to append a conversation message in the user conversation. +/// This must be used on the frontend to keep context of what happened on the +/// Meilisearch-side and keep good context for follow up questions. +const MEILI_APPEND_CONVERSATION_MESSAGE_NAME: &str = "_meiliAppendConversationMessage"; +/// The function name to report sources to the frontend. +/// This function is used to report sources to the frontend. +/// The call id is associated to the one used by the search progress function. +const MEILI_SEARCH_SOURCES_NAME: &str = "_meiliSearchSources"; +/// The *internal* function name to provide to the LLM to search in indexes. +/// This function must not leak to the user as the LLM will call it and the +/// main goal of Meilisearch is to provide an answer to these calls. +const MEILI_SEARCH_IN_INDEX_FUNCTION_NAME: &str = "_meiliSearchInIndex"; + +#[derive(Deserialize)] +pub struct ChatsParam { + workspace_uid: String, +} + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::get().to(list_workspaces))).service( + web::scope("/{workspace_uid}") + .service( + web::resource("") + .route(web::get().to(get_chat)) + .route(web::delete().to(delete_chat)), + ) + .service(web::scope("/chat/completions").configure(chat_completions::configure)) + .service(web::scope("/settings").configure(settings::configure)), + ); +} + +pub async fn get_chat( + index_scheduler: GuardedData, Data>, + workspace_uid: web::Path, +) -> Result { + index_scheduler.features().check_chat_completions("displaying a chat")?; + + let workspace_uid = IndexUid::try_from(workspace_uid.into_inner())?; + if index_scheduler.chat_workspace_exists(&workspace_uid)? { + Ok(HttpResponse::Ok().json(json!({ "uid": workspace_uid }))) + } else { + Err(ResponseError::from_msg(format!("chat {workspace_uid} not found"), Code::ChatNotFound)) + } +} + +pub async fn delete_chat( + index_scheduler: GuardedData, Data>, + workspace_uid: web::Path, +) -> Result { + index_scheduler.features().check_chat_completions("deleting a chat")?; + + let workspace_uid = workspace_uid.into_inner(); + if index_scheduler.delete_chat_settings(&workspace_uid)? { + Ok(HttpResponse::NoContent().finish()) + } else { + Err(ResponseError::from_msg(format!("chat {workspace_uid} not found"), Code::ChatNotFound)) + } +} + +#[derive(Deserr, Debug, Clone, Copy, IntoParams)] +#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] +pub struct ListChats { + /// The number of chat workspaces to skip before starting to retrieve anything + #[param(value_type = Option, default, example = 100)] + #[deserr(default, error = DeserrQueryParamError)] + pub offset: Param, + /// The number of chat workspaces to retrieve + #[param(value_type = Option, default = 20, example = 1)] + #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError)] + pub limit: Param, +} + +impl ListChats { + fn as_pagination(self) -> Pagination { + Pagination { offset: self.offset.0, limit: self.limit.0 } + } +} + +#[derive(Debug, Serialize, Clone, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ChatWorkspaceView { + /// Unique identifier for the index + pub uid: String, +} + +pub async fn list_workspaces( + index_scheduler: GuardedData, Data>, + paginate: AwebQueryParameter, +) -> Result { + index_scheduler.features().check_chat_completions("listing the chats")?; + + debug!(parameters = ?paginate, "List chat workspaces"); + let (total, workspaces) = + index_scheduler.paginated_chat_workspace_uids(*paginate.offset, *paginate.limit)?; + let workspaces = + workspaces.into_iter().map(|uid| ChatWorkspaceView { uid }).collect::>(); + let ret = paginate.as_pagination().format_with(total, workspaces); + + debug!(returns = ?ret, "List chat workspaces"); + Ok(HttpResponse::Ok().json(ret)) +} diff --git a/crates/meilisearch/src/routes/chats/settings.rs b/crates/meilisearch/src/routes/chats/settings.rs new file mode 100644 index 000000000..38eb0d3c5 --- /dev/null +++ b/crates/meilisearch/src/routes/chats/settings.rs @@ -0,0 +1,258 @@ +use actix_web::web::{self, Data}; +use actix_web::HttpResponse; +use deserr::Deserr; +use index_scheduler::IndexScheduler; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::features::{ + ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings, + ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT, + DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, + DEFAULT_CHAT_SYSTEM_PROMPT, +}; +use meilisearch_types::keys::actions; +use meilisearch_types::milli::update::Setting; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +use super::ChatsParam; +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::GuardedData; +use crate::extractors::sequential_extractor::SeqHandler; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service( + web::resource("") + .route(web::get().to(SeqHandler(get_settings))) + .route(web::patch().to(SeqHandler(patch_settings))) + .route(web::delete().to(SeqHandler(reset_settings))), + ); +} + +async fn get_settings( + index_scheduler: GuardedData< + ActionPolicy<{ actions::CHATS_SETTINGS_GET }>, + Data, + >, + chats_param: web::Path, +) -> Result { + index_scheduler.features().check_chat_completions("using the /chats/settings route")?; + + let ChatsParam { workspace_uid } = chats_param.into_inner(); + + let mut settings = match index_scheduler.chat_settings(&workspace_uid)? { + Some(settings) => settings, + None => { + return Err(ResponseError::from_msg( + format!("Chat `{workspace_uid}` not found"), + Code::ChatNotFound, + )) + } + }; + settings.hide_secrets(); + Ok(HttpResponse::Ok().json(settings)) +} + +async fn patch_settings( + index_scheduler: GuardedData< + ActionPolicy<{ actions::CHATS_SETTINGS_UPDATE }>, + Data, + >, + chats_param: web::Path, + web::Json(new): web::Json, +) -> Result { + index_scheduler.features().check_chat_completions("using the /chats/settings route")?; + let ChatsParam { workspace_uid } = chats_param.into_inner(); + + let old_settings = index_scheduler.chat_settings(&workspace_uid)?.unwrap_or_default(); + + let prompts = match new.prompts { + Setting::Set(new_prompts) => DbChatCompletionPrompts { + system: match new_prompts.system { + Setting::Set(new_system) => new_system, + Setting::Reset => DEFAULT_CHAT_SYSTEM_PROMPT.to_string(), + Setting::NotSet => old_settings.prompts.system, + }, + search_description: match new_prompts.search_description { + Setting::Set(new_description) => new_description, + Setting::Reset => DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(), + Setting::NotSet => old_settings.prompts.search_description, + }, + search_q_param: match new_prompts.search_q_param { + Setting::Set(new_description) => new_description, + Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(), + Setting::NotSet => old_settings.prompts.search_q_param, + }, + search_index_uid_param: match new_prompts.search_index_uid_param { + Setting::Set(new_description) => new_description, + Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(), + Setting::NotSet => old_settings.prompts.search_index_uid_param, + }, + }, + Setting::Reset => DbChatCompletionPrompts::default(), + Setting::NotSet => old_settings.prompts, + }; + + let mut settings = ChatCompletionSettings { + source: match new.source { + Setting::Set(new_source) => new_source.into(), + Setting::Reset => DbChatCompletionSource::default(), + Setting::NotSet => old_settings.source, + }, + org_id: match new.org_id { + Setting::Set(new_org_id) => Some(new_org_id), + Setting::Reset => None, + Setting::NotSet => old_settings.org_id, + }, + project_id: match new.project_id { + Setting::Set(new_project_id) => Some(new_project_id), + Setting::Reset => None, + Setting::NotSet => old_settings.project_id, + }, + api_version: match new.api_version { + Setting::Set(new_api_version) => Some(new_api_version), + Setting::Reset => None, + Setting::NotSet => old_settings.api_version, + }, + deployment_id: match new.deployment_id { + Setting::Set(new_deployment_id) => Some(new_deployment_id), + Setting::Reset => None, + Setting::NotSet => old_settings.deployment_id, + }, + base_url: match new.base_url { + Setting::Set(new_base_url) => Some(new_base_url), + Setting::Reset => None, + Setting::NotSet => old_settings.base_url, + }, + api_key: match new.api_key { + Setting::Set(new_api_key) => Some(new_api_key), + Setting::Reset => None, + Setting::NotSet => old_settings.api_key, + }, + prompts, + }; + + // TODO send analytics + // analytics.publish( + // PatchNetworkAnalytics { + // network_size: merged_remotes.len(), + // network_has_self: merged_self.is_some(), + // }, + // &req, + // ); + + settings.validate()?; + index_scheduler.put_chat_settings(&workspace_uid, &settings)?; + + settings.hide_secrets(); + + Ok(HttpResponse::Ok().json(settings)) +} + +async fn reset_settings( + index_scheduler: GuardedData< + ActionPolicy<{ actions::CHATS_SETTINGS_UPDATE }>, + Data, + >, + chats_param: web::Path, +) -> Result { + index_scheduler.features().check_chat_completions("using the /chats/settings route")?; + + let ChatsParam { workspace_uid } = chats_param.into_inner(); + if index_scheduler.chat_settings(&workspace_uid)?.is_some() { + let settings = Default::default(); + index_scheduler.put_chat_settings(&workspace_uid, &settings)?; + Ok(HttpResponse::Ok().json(settings)) + } else { + Err(ResponseError::from_msg( + format!("Chat `{workspace_uid}` not found"), + Code::ChatNotFound, + )) + } +} + +#[derive(Debug, Clone, Deserialize, Deserr, ToSchema)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct ChatWorkspaceSettings { + #[serde(default)] + #[deserr(default)] + #[schema(value_type = Option)] + pub source: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("dcba4321..."))] + pub org_id: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("4321dcba..."))] + pub project_id: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("2024-02-01"))] + pub api_version: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("1234abcd..."))] + pub deployment_id: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("https://api.mistral.ai/v1"))] + pub base_url: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("abcd1234..."))] + pub api_key: Setting, + #[serde(default)] + #[deserr(default)] + #[schema(inline, value_type = Option)] + pub prompts: Setting, +} + +#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub enum ChatCompletionSource { + #[default] + OpenAi, + Mistral, + AzureOpenAi, + VLlm, +} + +impl From for DbChatCompletionSource { + fn from(source: ChatCompletionSource) -> Self { + use ChatCompletionSource::*; + match source { + OpenAi => DbChatCompletionSource::OpenAi, + Mistral => DbChatCompletionSource::Mistral, + AzureOpenAi => DbChatCompletionSource::AzureOpenAi, + VLlm => DbChatCompletionSource::VLlm, + } + } +} + +#[derive(Debug, Clone, Deserialize, Deserr, ToSchema)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct ChatPrompts { + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("You are a helpful assistant..."))] + pub system: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("This is the search function..."))] + pub search_description: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("This is query parameter..."))] + pub search_q_param: Setting, + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("This is index you want to search in..."))] + pub search_index_uid_param: Setting, +} diff --git a/crates/meilisearch/src/routes/chats/utils.rs b/crates/meilisearch/src/routes/chats/utils.rs new file mode 100644 index 000000000..61961bd4b --- /dev/null +++ b/crates/meilisearch/src/routes/chats/utils.rs @@ -0,0 +1,253 @@ +use std::cell::RefCell; +use std::sync::RwLock; + +use actix_web_lab::sse::{self, Event}; +use async_openai::types::{ + ChatChoiceStream, ChatCompletionMessageToolCall, ChatCompletionMessageToolCallChunk, + ChatCompletionRequestAssistantMessage, ChatCompletionRequestMessage, + ChatCompletionStreamResponseDelta, ChatCompletionToolType, CreateChatCompletionStreamResponse, + FunctionCall, FunctionCallStream, Role, +}; +use bumpalo::Bump; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::heed::RoTxn; +use meilisearch_types::milli::index::ChatConfig; +use meilisearch_types::milli::prompt::{Prompt, PromptData}; +use meilisearch_types::milli::update::new::document::DocumentFromDb; +use meilisearch_types::milli::{ + DocumentId, FieldIdMapWithMetadata, GlobalFieldsIdsMap, MetadataBuilder, +}; +use meilisearch_types::{Document, Index}; +use serde::Serialize; +use tokio::sync::mpsc::error::SendError; +use tokio::sync::mpsc::Sender; + +use super::errors::StreamErrorEvent; +use super::MEILI_APPEND_CONVERSATION_MESSAGE_NAME; +use crate::routes::chats::{MEILI_SEARCH_PROGRESS_NAME, MEILI_SEARCH_SOURCES_NAME}; + +pub struct SseEventSender(Sender); + +impl SseEventSender { + pub fn new(sender: Sender) -> Self { + Self(sender) + } + + /// Ask the front-end user to append this tool *call* to the conversation + pub async fn append_tool_call_conversation_message( + &self, + resp: CreateChatCompletionStreamResponse, + call_id: String, + function_name: String, + function_arguments: String, + ) -> Result<(), SendError> { + #[allow(deprecated)] // function_call + let message = + ChatCompletionRequestMessage::Assistant(ChatCompletionRequestAssistantMessage { + content: None, + refusal: None, + name: None, + audio: None, + tool_calls: Some(vec![ChatCompletionMessageToolCall { + id: call_id, + r#type: Some(ChatCompletionToolType::Function), + function: FunctionCall { name: function_name, arguments: function_arguments }, + }]), + function_call: None, + }); + + self.append_conversation_message(resp, &message).await + } + + /// Ask the front-end user to append this tool to the conversation + pub async fn append_conversation_message( + &self, + mut resp: CreateChatCompletionStreamResponse, + message: &ChatCompletionRequestMessage, + ) -> Result<(), SendError> { + let call_text = serde_json::to_string(message).unwrap(); + let tool_call = ChatCompletionMessageToolCallChunk { + index: 0, + id: Some(uuid::Uuid::new_v4().to_string()), + r#type: Some(ChatCompletionToolType::Function), + function: Some(FunctionCallStream { + name: Some(MEILI_APPEND_CONVERSATION_MESSAGE_NAME.to_string()), + arguments: Some(call_text), + }), + }; + + resp.choices[0] = ChatChoiceStream { + index: 0, + #[allow(deprecated)] // function_call + delta: ChatCompletionStreamResponseDelta { + content: None, + function_call: None, + tool_calls: Some(vec![tool_call]), + role: Some(Role::Assistant), + refusal: None, + }, + finish_reason: None, + logprobs: None, + }; + + self.send_json(&resp).await + } + + pub async fn report_search_progress( + &self, + mut resp: CreateChatCompletionStreamResponse, + call_id: &str, + function_name: &str, + function_arguments: &str, + ) -> Result<(), SendError> { + #[derive(Debug, Clone, Serialize)] + /// Provides information about the current Meilisearch search operation. + struct MeiliSearchProgress<'a> { + /// The call ID to track the sources of the search. + call_id: &'a str, + /// The name of the function we are executing. + function_name: &'a str, + /// The arguments of the function we are executing, encoded in JSON. + function_arguments: &'a str, + } + + let progress = MeiliSearchProgress { call_id, function_name, function_arguments }; + let call_text = serde_json::to_string(&progress).unwrap(); + let tool_call = ChatCompletionMessageToolCallChunk { + index: 0, + id: Some(uuid::Uuid::new_v4().to_string()), + r#type: Some(ChatCompletionToolType::Function), + function: Some(FunctionCallStream { + name: Some(MEILI_SEARCH_PROGRESS_NAME.to_string()), + arguments: Some(call_text), + }), + }; + + resp.choices[0] = ChatChoiceStream { + index: 0, + #[allow(deprecated)] // function_call + delta: ChatCompletionStreamResponseDelta { + content: None, + function_call: None, + tool_calls: Some(vec![tool_call]), + role: Some(Role::Assistant), + refusal: None, + }, + finish_reason: None, + logprobs: None, + }; + + self.send_json(&resp).await + } + + pub async fn report_sources( + &self, + mut resp: CreateChatCompletionStreamResponse, + call_id: &str, + documents: &[Document], + ) -> Result<(), SendError> { + #[derive(Debug, Clone, Serialize)] + /// Provides sources of the search. + struct MeiliSearchSources<'a> { + /// The call ID to track the original search associated to those sources. + call_id: &'a str, + /// The documents associated with the search (call_id). + /// Only the displayed attributes of the documents are returned. + sources: &'a [Document], + } + + let sources = MeiliSearchSources { call_id, sources: documents }; + let call_text = serde_json::to_string(&sources).unwrap(); + let tool_call = ChatCompletionMessageToolCallChunk { + index: 0, + id: Some(uuid::Uuid::new_v4().to_string()), + r#type: Some(ChatCompletionToolType::Function), + function: Some(FunctionCallStream { + name: Some(MEILI_SEARCH_SOURCES_NAME.to_string()), + arguments: Some(call_text), + }), + }; + + resp.choices[0] = ChatChoiceStream { + index: 0, + #[allow(deprecated)] // function_call + delta: ChatCompletionStreamResponseDelta { + content: None, + function_call: None, + tool_calls: Some(vec![tool_call]), + role: Some(Role::Assistant), + refusal: None, + }, + finish_reason: None, + logprobs: None, + }; + + self.send_json(&resp).await + } + + pub async fn forward_response( + &self, + resp: &CreateChatCompletionStreamResponse, + ) -> Result<(), SendError> { + self.send_json(resp).await + } + + pub async fn send_error(&self, error: &StreamErrorEvent) -> Result<(), SendError> { + self.send_json(error).await + } + + pub async fn stop(self) -> Result<(), SendError> { + // It is the way OpenAI sends a correct end of stream + // + const DONE_DATA: &str = "[DONE]"; + self.0.send(Event::Data(sse::Data::new(DONE_DATA))).await + } + + async fn send_json(&self, data: &S) -> Result<(), SendError> { + self.0.send(Event::Data(sse::Data::new_json(data).unwrap())).await + } +} + +/// Format documents based on the provided template and maximum bytes. +/// +/// This formatting function is usually used to generate a summary of the documents for LLMs. +pub fn format_documents<'doc>( + rtxn: &RoTxn<'_>, + index: &Index, + doc_alloc: &'doc Bump, + internal_docids: Vec, +) -> Result, ResponseError> { + let ChatConfig { prompt: PromptData { template, max_bytes }, .. } = index.chat_config(rtxn)?; + + let prompt = Prompt::new(template, max_bytes).unwrap(); + let fid_map = index.fields_ids_map(rtxn)?; + let metadata_builder = MetadataBuilder::from_index(index, rtxn)?; + let fid_map_with_meta = FieldIdMapWithMetadata::new(fid_map.clone(), metadata_builder); + let global = RwLock::new(fid_map_with_meta); + let gfid_map = RefCell::new(GlobalFieldsIdsMap::new(&global)); + + let external_ids: Vec = index + .external_id_of(rtxn, internal_docids.iter().copied())? + .into_iter() + .collect::>()?; + + let mut renders = Vec::new(); + for (docid, external_docid) in internal_docids.into_iter().zip(external_ids) { + let document = match DocumentFromDb::new(docid, rtxn, index, &fid_map)? { + Some(doc) => doc, + None => unreachable!("Document with internal ID {docid} not found"), + }; + let text = match prompt.render_document(&external_docid, document, &gfid_map, doc_alloc) { + Ok(text) => text, + Err(err) => { + return Err(ResponseError::from_msg( + err.to_string(), + Code::InvalidChatSettingDocumentTemplate, + )) + } + }; + renders.push(text); + } + + Ok(renders) +} diff --git a/crates/meilisearch/src/routes/export.rs b/crates/meilisearch/src/routes/export.rs new file mode 100644 index 000000000..a4b6720d1 --- /dev/null +++ b/crates/meilisearch/src/routes/export.rs @@ -0,0 +1,183 @@ +use std::collections::BTreeMap; +use std::convert::Infallible; +use std::str::FromStr as _; + +use actix_web::web::{self, Data}; +use actix_web::{HttpRequest, HttpResponse}; +use byte_unit::Byte; +use deserr::actix_web::AwebJson; +use deserr::Deserr; +use index_scheduler::IndexScheduler; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid_pattern::IndexUidPattern; +use meilisearch_types::keys::actions; +use meilisearch_types::tasks::{ExportIndexSettings as DbExportIndexSettings, KindWithContent}; +use serde::Serialize; +use serde_json::Value; +use tracing::debug; +use utoipa::{OpenApi, ToSchema}; + +use crate::analytics::Analytics; +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::GuardedData; +use crate::routes::export_analytics::ExportAnalytics; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; +use crate::Opt; + +#[derive(OpenApi)] +#[openapi( + paths(export), + tags(( + name = "Export", + description = "The `/export` route allows you to trigger an export process to a remote Meilisearch instance.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/export"), + )), +)] +pub struct ExportApi; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(export))); +} + +#[utoipa::path( + post, + path = "", + tag = "Export", + security(("Bearer" = ["export", "*"])), + responses( + (status = 202, description = "Export successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 1, + "status": "enqueued", + "type": "export", + "enqueuedAt": "2021-08-11T09:25:53.000000Z" + })), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] +async fn export( + index_scheduler: GuardedData, Data>, + export: AwebJson, + req: HttpRequest, + opt: web::Data, + analytics: Data, +) -> Result { + let export = export.into_inner(); + debug!(returns = ?export, "Trigger export"); + + let analytics_aggregate = ExportAnalytics::from_export(&export); + + let Export { url, api_key, payload_size, indexes } = export; + + let indexes = match indexes { + Some(indexes) => indexes + .into_iter() + .map(|(pattern, ExportIndexSettings { filter, override_settings })| { + (pattern, DbExportIndexSettings { filter, override_settings }) + }) + .collect(), + None => BTreeMap::from([( + IndexUidPattern::new_unchecked("*"), + DbExportIndexSettings::default(), + )]), + }; + + let task = KindWithContent::Export { + url, + api_key, + payload_size: payload_size.map(|ByteWithDeserr(bytes)| bytes), + indexes, + }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); + + analytics.publish(analytics_aggregate, &req); + + Ok(HttpResponse::Ok().json(task)) +} + +#[derive(Debug, Deserr, ToSchema, Serialize)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct Export { + #[schema(value_type = Option, example = json!("https://ms-1234.heaven.meilisearch.com"))] + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + pub url: String, + #[schema(value_type = Option, example = json!("1234abcd"))] + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + pub api_key: Option, + #[schema(value_type = Option, example = json!("24MiB"))] + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + pub payload_size: Option, + #[schema(value_type = Option>, example = json!({ "*": { "filter": null } }))] + #[deserr(default)] + #[serde(default)] + pub indexes: Option>, +} + +/// A wrapper around the `Byte` type that implements `Deserr`. +#[derive(Debug, Serialize)] +#[serde(transparent)] +pub struct ByteWithDeserr(pub Byte); + +impl deserr::Deserr for ByteWithDeserr +where + E: deserr::DeserializeError, +{ + fn deserialize_from_value( + value: deserr::Value, + location: deserr::ValuePointerRef, + ) -> Result { + use deserr::{ErrorKind, Value, ValueKind}; + match value { + Value::Integer(integer) => Ok(ByteWithDeserr(Byte::from_u64(integer))), + Value::String(string) => Byte::from_str(&string).map(ByteWithDeserr).map_err(|e| { + deserr::take_cf_content(E::error::( + None, + ErrorKind::Unexpected { msg: e.to_string() }, + location, + )) + }), + actual => Err(deserr::take_cf_content(E::error( + None, + ErrorKind::IncorrectValueKind { + actual, + accepted: &[ValueKind::Integer, ValueKind::String], + }, + location, + ))), + } + } +} + +#[derive(Debug, Deserr, ToSchema, Serialize)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct ExportIndexSettings { + #[schema(value_type = Option, example = json!("genres = action"))] + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + pub filter: Option, + #[schema(value_type = Option, example = json!(true))] + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + pub override_settings: bool, +} diff --git a/crates/meilisearch/src/routes/export_analytics.rs b/crates/meilisearch/src/routes/export_analytics.rs new file mode 100644 index 000000000..a2f0a129d --- /dev/null +++ b/crates/meilisearch/src/routes/export_analytics.rs @@ -0,0 +1,111 @@ +use url::Url; + +use crate::analytics::Aggregate; +use crate::routes::export::Export; + +#[derive(Default)] +pub struct ExportAnalytics { + total_received: usize, + has_api_key: bool, + sum_exports_meilisearch_cloud: usize, + sum_index_patterns: usize, + sum_patterns_with_filter: usize, + sum_patterns_with_override_settings: usize, + payload_sizes: Vec, +} + +impl ExportAnalytics { + pub fn from_export(export: &Export) -> Self { + let Export { url, api_key, payload_size, indexes } = export; + + let url = Url::parse(url).ok(); + let is_meilisearch_cloud = url.as_ref().and_then(Url::host_str).is_some_and(|host| { + host.ends_with("meilisearch.dev") + || host.ends_with("meilisearch.com") + || host.ends_with("meilisearch.io") + }); + let has_api_key = api_key.is_some(); + let index_patterns_count = indexes.as_ref().map_or(0, |indexes| indexes.len()); + let patterns_with_filter_count = indexes.as_ref().map_or(0, |indexes| { + indexes.values().filter(|settings| settings.filter.is_some()).count() + }); + let patterns_with_override_settings_count = indexes.as_ref().map_or(0, |indexes| { + indexes.values().filter(|settings| settings.override_settings).count() + }); + let payload_sizes = + if let Some(crate::routes::export::ByteWithDeserr(byte_size)) = payload_size { + vec![byte_size.as_u64()] + } else { + vec![] + }; + + Self { + total_received: 1, + has_api_key, + sum_exports_meilisearch_cloud: is_meilisearch_cloud as usize, + sum_index_patterns: index_patterns_count, + sum_patterns_with_filter: patterns_with_filter_count, + sum_patterns_with_override_settings: patterns_with_override_settings_count, + payload_sizes, + } + } +} + +impl Aggregate for ExportAnalytics { + fn event_name(&self) -> &'static str { + "Export Triggered" + } + + fn aggregate(mut self: Box, other: Box) -> Box { + self.total_received += other.total_received; + self.has_api_key |= other.has_api_key; + self.sum_exports_meilisearch_cloud += other.sum_exports_meilisearch_cloud; + self.sum_index_patterns += other.sum_index_patterns; + self.sum_patterns_with_filter += other.sum_patterns_with_filter; + self.sum_patterns_with_override_settings += other.sum_patterns_with_override_settings; + self.payload_sizes.extend(other.payload_sizes); + self + } + + fn into_event(self: Box) -> serde_json::Value { + let avg_payload_size = if self.payload_sizes.is_empty() { + None + } else { + Some(self.payload_sizes.iter().sum::() / self.payload_sizes.len() as u64) + }; + + let avg_exports_meilisearch_cloud = if self.total_received == 0 { + None + } else { + Some(self.sum_exports_meilisearch_cloud as f64 / self.total_received as f64) + }; + + let avg_index_patterns = if self.total_received == 0 { + None + } else { + Some(self.sum_index_patterns as f64 / self.total_received as f64) + }; + + let avg_patterns_with_filter = if self.total_received == 0 { + None + } else { + Some(self.sum_patterns_with_filter as f64 / self.total_received as f64) + }; + + let avg_patterns_with_override_settings = if self.total_received == 0 { + None + } else { + Some(self.sum_patterns_with_override_settings as f64 / self.total_received as f64) + }; + + serde_json::json!({ + "total_received": self.total_received, + "has_api_key": self.has_api_key, + "avg_exports_meilisearch_cloud": avg_exports_meilisearch_cloud, + "avg_index_patterns": avg_index_patterns, + "avg_patterns_with_filter": avg_patterns_with_filter, + "avg_patterns_with_override_settings": avg_patterns_with_override_settings, + "avg_payload_size": avg_payload_size, + }) + } +} diff --git a/crates/meilisearch/src/routes/features.rs b/crates/meilisearch/src/routes/features.rs index eb8e7ac04..1a1f89b2d 100644 --- a/crates/meilisearch/src/routes/features.rs +++ b/crates/meilisearch/src/routes/features.rs @@ -53,6 +53,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { network: Some(false), get_task_documents_route: Some(false), composite_embedders: Some(false), + chat_completions: Some(false), + multimodal: Some(false), })), (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( { @@ -97,6 +99,10 @@ pub struct RuntimeTogglableFeatures { pub get_task_documents_route: Option, #[deserr(default)] pub composite_embedders: Option, + #[deserr(default)] + pub chat_completions: Option, + #[deserr(default)] + pub multimodal: Option, } impl From for RuntimeTogglableFeatures { @@ -109,6 +115,8 @@ impl From for RuntimeTogg network, get_task_documents_route, composite_embedders, + chat_completions, + multimodal, } = value; Self { @@ -119,6 +127,8 @@ impl From for RuntimeTogg network: Some(network), get_task_documents_route: Some(get_task_documents_route), composite_embedders: Some(composite_embedders), + chat_completions: Some(chat_completions), + multimodal: Some(multimodal), } } } @@ -132,6 +142,8 @@ pub struct PatchExperimentalFeatureAnalytics { network: bool, get_task_documents_route: bool, composite_embedders: bool, + chat_completions: bool, + multimodal: bool, } impl Aggregate for PatchExperimentalFeatureAnalytics { @@ -148,6 +160,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { network: new.network, get_task_documents_route: new.get_task_documents_route, composite_embedders: new.composite_embedders, + chat_completions: new.chat_completions, + multimodal: new.multimodal, }) } @@ -173,6 +187,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { network: Some(false), get_task_documents_route: Some(false), composite_embedders: Some(false), + chat_completions: Some(false), + multimodal: Some(false), })), (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( { @@ -214,6 +230,8 @@ async fn patch_features( .0 .composite_embedders .unwrap_or(old_features.composite_embedders), + chat_completions: new_features.0.chat_completions.unwrap_or(old_features.chat_completions), + multimodal: new_features.0.multimodal.unwrap_or(old_features.multimodal), }; // explicitly destructure for analytics rather than using the `Serialize` implementation, because @@ -227,6 +245,8 @@ async fn patch_features( network, get_task_documents_route, composite_embedders, + chat_completions, + multimodal, } = new_features; analytics.publish( @@ -238,6 +258,8 @@ async fn patch_features( network, get_task_documents_route, composite_embedders, + chat_completions, + multimodal, }, &req, ); diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index 50eec46fe..a93d736f7 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -1452,7 +1452,6 @@ fn some_documents<'a, 't: 'a>( ) -> Result> + 'a, ResponseError> { let fields_ids_map = index.fields_ids_map(rtxn)?; let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let embedding_configs = index.embedding_configs(rtxn)?; Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| { ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> { @@ -1468,15 +1467,9 @@ fn some_documents<'a, 't: 'a>( Some(Value::Object(map)) => map, _ => Default::default(), }; - for (name, vector) in index.embeddings(rtxn, key)? { - let user_provided = embedding_configs - .iter() - .find(|conf| conf.name == name) - .is_some_and(|conf| conf.user_provided.contains(key)); - let embeddings = ExplicitVectors { - embeddings: Some(vector.into()), - regenerate: !user_provided, - }; + for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? { + let embeddings = + ExplicitVectors { embeddings: Some(vector.into()), regenerate }; vectors.insert( name, serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?, diff --git a/crates/meilisearch/src/routes/indexes/facet_search.rs b/crates/meilisearch/src/routes/indexes/facet_search.rs index 41f306746..18ad54ccf 100644 --- a/crates/meilisearch/src/routes/indexes/facet_search.rs +++ b/crates/meilisearch/src/routes/indexes/facet_search.rs @@ -56,6 +56,8 @@ pub struct FacetSearchQuery { pub q: Option, #[deserr(default, error = DeserrJsonError)] pub vector: Option>, + #[deserr(default, error = DeserrJsonError)] + pub media: Option, #[deserr(default, error = DeserrJsonError)] pub hybrid: Option, #[deserr(default, error = DeserrJsonError)] @@ -94,6 +96,7 @@ impl FacetSearchAggregator { facet_name, vector, q, + media, filter, matching_strategy, attributes_to_search_on, @@ -108,6 +111,7 @@ impl FacetSearchAggregator { facet_names: Some(facet_name.clone()).into_iter().collect(), additional_search_parameters_provided: q.is_some() || vector.is_some() + || media.is_some() || filter.is_some() || *matching_strategy != MatchingStrategy::default() || attributes_to_search_on.is_some() @@ -291,6 +295,7 @@ impl From for SearchQuery { facet_name: _, q, vector, + media, filter, matching_strategy, attributes_to_search_on, @@ -312,6 +317,7 @@ impl From for SearchQuery { SearchQuery { q, + media, offset: DEFAULT_SEARCH_OFFSET(), limit: DEFAULT_SEARCH_LIMIT(), page, diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index 48ed1cfb1..04b3e12c4 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -172,7 +172,7 @@ pub async fn list_indexes( debug!(parameters = ?paginate, "List indexes"); let filters = index_scheduler.filters(); let (total, indexes) = - index_scheduler.get_paginated_indexes_stats(filters, *paginate.offset, *paginate.limit)?; + index_scheduler.paginated_indexes_stats(filters, *paginate.offset, *paginate.limit)?; let indexes = indexes .into_iter() .map(|(name, stats)| IndexView { diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index 333ae1944..697ae9241 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -205,6 +205,8 @@ impl TryFrom for SearchQuery { Ok(Self { q: other.q, + // `media` not supported for `GET` + media: None, vector: other.vector.map(CS::into_inner), offset: other.offset.0, limit: other.limit.0, @@ -481,28 +483,30 @@ pub fn search_kind( index_uid: String, index: &milli::Index, ) -> Result { + let is_placeholder_query = + if let Some(q) = query.q.as_deref() { q.trim().is_empty() } else { true }; + let non_placeholder_query = !is_placeholder_query; + let is_media = query.media.is_some(); // handle with care, the order of cases matters, the semantics is subtle - match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) { - // empty query, no vector => placeholder search - (Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly), - // no query, no vector => placeholder search - (None, _, None) => Ok(SearchKind::KeywordOnly), - // hybrid.semantic_ratio == 1.0 => vector - (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => { - SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len())) - } - // hybrid.semantic_ratio == 0.0 => keyword - (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => { + match (is_media, non_placeholder_query, &query.hybrid, query.vector.as_deref()) { + // media + vector => error + (true, _, _, Some(_)) => Err(MeilisearchHttpError::MediaAndVector.into()), + // media + !hybrid => error + (true, _, None, _) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), + // vector + !hybrid => error + (_, _, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), + // hybrid S0 => keyword + (_, _, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => { Ok(SearchKind::KeywordOnly) } - // no query, hybrid, vector => semantic - (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => { - SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len())) + // !q + !vector => placeholder search + (false, false, _, None) => Ok(SearchKind::KeywordOnly), + // hybrid S100 => semantic + (_, _, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => { + SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len())) } - // query, no hybrid, no vector => keyword - (Some(_), None, None) => Ok(SearchKind::KeywordOnly), - // query, hybrid, maybe vector => hybrid - (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid( + // q + hybrid => hybrid + (_, true, Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid( index_scheduler, index_uid, index, @@ -510,7 +514,11 @@ pub fn search_kind( **semantic_ratio, v.map(|v| v.len()), ), - - (_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), + // !q + hybrid => semantic + (_, false, Some(HybridQuery { semantic_ratio: _, embedder }), v) => { + SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len())) + } + // q => keyword + (false, true, None, None) => Ok(SearchKind::KeywordOnly), } } diff --git a/crates/meilisearch/src/routes/indexes/search_analytics.rs b/crates/meilisearch/src/routes/indexes/search_analytics.rs index b16e2636e..07f79eba7 100644 --- a/crates/meilisearch/src/routes/indexes/search_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/search_analytics.rs @@ -61,6 +61,8 @@ pub struct SearchAggregator { semantic_ratio: bool, hybrid: bool, retrieve_vectors: bool, + // Number of requests containing `media` + total_media: usize, // every time a search is done, we increment the counter linked to the used settings matching_strategy: HashMap, @@ -101,6 +103,7 @@ impl SearchAggregator { let SearchQuery { q, vector, + media, offset, limit, page, @@ -175,6 +178,11 @@ impl SearchAggregator { if let Some(ref vector) = vector { ret.max_vector_size = vector.len(); } + + if media.is_some() { + ret.total_media = 1; + } + ret.retrieve_vectors |= retrieve_vectors; if query.is_finite_pagination() { @@ -277,6 +285,7 @@ impl Aggregate for SearchAggregator { show_ranking_score_details, semantic_ratio, hybrid, + total_media, total_degraded, total_used_negative_operator, ranking_score_threshold, @@ -327,6 +336,7 @@ impl Aggregate for SearchAggregator { self.retrieve_vectors |= retrieve_vectors; self.semantic_ratio |= semantic_ratio; self.hybrid |= hybrid; + self.total_media += total_media; // pagination self.max_limit = self.max_limit.max(max_limit); @@ -403,6 +413,7 @@ impl Aggregate for SearchAggregator { show_ranking_score_details, semantic_ratio, hybrid, + total_media, total_degraded, total_used_negative_operator, ranking_score_threshold, @@ -450,6 +461,7 @@ impl Aggregate for SearchAggregator { "hybrid": { "enabled": hybrid, "semantic_ratio": semantic_ratio, + "total_media": total_media, }, "pagination": { "max_limit": max_limit, diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs index 92b018c8c..308977a6e 100644 --- a/crates/meilisearch/src/routes/indexes/settings.rs +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -5,8 +5,9 @@ use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{ - settings, SecretPolicy, SettingEmbeddingSettings, Settings, Unchecked, + settings, ChatSettings, SecretPolicy, SettingEmbeddingSettings, Settings, Unchecked, }; use meilisearch_types::tasks::KindWithContent; use tracing::debug; @@ -508,6 +509,17 @@ make_setting_routes!( camelcase_attr: "prefixSearch", analytics: PrefixSearchAnalytics }, + { + route: "/chat", + update_verb: put, + value_type: ChatSettings, + err_type: meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsIndexChat, + >, + attr: chat, + camelcase_attr: "chat", + analytics: ChatAnalytics + }, ); #[utoipa::path( @@ -597,6 +609,7 @@ pub async fn update_all( ), facet_search: FacetSearchAnalytics::new(new_settings.facet_search.as_ref().set()), prefix_search: PrefixSearchAnalytics::new(new_settings.prefix_search.as_ref().set()), + chat: ChatAnalytics::new(new_settings.chat.as_ref().set()), }, &req, ); @@ -651,7 +664,11 @@ pub async fn get_all( let index = index_scheduler.index(&index_uid)?; let rtxn = index.read_txn()?; - let new_settings = settings(&index, &rtxn, SecretPolicy::HideSecrets)?; + let mut new_settings = settings(&index, &rtxn, SecretPolicy::HideSecrets)?; + if index_scheduler.features().check_chat_completions("showing index `chat` settings").is_err() { + new_settings.chat = Setting::NotSet; + } + debug!(returns = ?new_settings, "Get all settings"); Ok(HttpResponse::Ok().json(new_settings)) } @@ -738,8 +755,20 @@ fn validate_settings( if matches!(embedder.indexing_embedder, Setting::Set(_)) { features.check_composite_embedders("setting `indexingEmbedder`")?; } + + if matches!(embedder.indexing_fragments, Setting::Set(_)) { + features.check_multimodal("setting `indexingFragments`")?; + } + + if matches!(embedder.search_fragments, Setting::Set(_)) { + features.check_multimodal("setting `searchFragments`")?; + } } } + if let Setting::Set(_chat) = &settings.chat { + features.check_chat_completions("setting `chat` in the index settings")?; + } + Ok(settings.validate()?) } diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index cb5983f02..1b8d0e244 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -10,8 +10,8 @@ use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::FilterableAttributesRule; use meilisearch_types::settings::{ - FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, - RankingRuleView, SettingEmbeddingSettings, TypoSettings, + ChatSettings, FacetingSettings, PaginationSettings, PrefixSearchSettings, + ProximityPrecisionView, RankingRuleView, SettingEmbeddingSettings, TypoSettings, }; use serde::Serialize; @@ -39,6 +39,7 @@ pub struct SettingsAnalytics { pub non_separator_tokens: NonSeparatorTokensAnalytics, pub facet_search: FacetSearchAnalytics, pub prefix_search: PrefixSearchAnalytics, + pub chat: ChatAnalytics, } impl Aggregate for SettingsAnalytics { @@ -198,6 +199,7 @@ impl Aggregate for SettingsAnalytics { set: new.prefix_search.set | self.prefix_search.set, value: new.prefix_search.value.or(self.prefix_search.value), }, + chat: ChatAnalytics { set: new.chat.set | self.chat.set }, }) } @@ -454,7 +456,9 @@ pub struct PaginationAnalytics { impl PaginationAnalytics { pub fn new(setting: Option<&PaginationSettings>) -> Self { - Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } + Self { + max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set().map(|x| x.into())), + } } pub fn into_settings(self) -> SettingsAnalytics { @@ -674,3 +678,18 @@ impl PrefixSearchAnalytics { SettingsAnalytics { prefix_search: self, ..Default::default() } } } + +#[derive(Serialize, Default)] +pub struct ChatAnalytics { + pub set: bool, +} + +impl ChatAnalytics { + pub fn new(settings: Option<&ChatSettings>) -> Self { + Self { set: settings.is_some() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { chat: self, ..Default::default() } + } +} diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index 2c71fa68b..260d973a1 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -2,6 +2,7 @@ use std::collections::BTreeMap; use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; +use export::Export; use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; use meilisearch_types::batch_view::BatchView; @@ -52,7 +53,10 @@ const PAGINATION_DEFAULT_LIMIT_FN: fn() -> usize = || 20; mod api_key; pub mod batches; +pub mod chats; mod dump; +mod export; +mod export_analytics; pub mod features; pub mod indexes; mod logs; @@ -83,6 +87,7 @@ mod tasks_test; (path = "/multi-search", api = multi_search::MultiSearchApi), (path = "/swap-indexes", api = swap_indexes::SwapIndexesApi), (path = "/experimental-features", api = features::ExperimentalFeaturesApi), + (path = "/export", api = export::ExportApi), (path = "/network", api = network::NetworkApi), ), paths(get_health, get_version, get_stats), @@ -94,7 +99,7 @@ mod tasks_test; url = "/", description = "Local server", )), - components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures)) + components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export)) )] pub struct MeilisearchApi; @@ -113,7 +118,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/swap-indexes").configure(swap_indexes::configure)) .service(web::scope("/metrics").configure(metrics::configure)) .service(web::scope("/experimental-features").configure(features::configure)) - .service(web::scope("/network").configure(network::configure)); + .service(web::scope("/network").configure(network::configure)) + .service(web::scope("/export").configure(export::configure)) + .service(web::scope("/chats").configure(chats::configure)); #[cfg(feature = "swagger")] { diff --git a/crates/meilisearch/src/routes/multi_search_analytics.rs b/crates/meilisearch/src/routes/multi_search_analytics.rs index 3fa23f630..c24875797 100644 --- a/crates/meilisearch/src/routes/multi_search_analytics.rs +++ b/crates/meilisearch/src/routes/multi_search_analytics.rs @@ -42,6 +42,7 @@ impl MultiSearchAggregator { federation_options, q: _, vector: _, + media: _, offset: _, limit: _, page: _, diff --git a/crates/meilisearch/src/routes/tasks_test.rs b/crates/meilisearch/src/routes/tasks_test.rs index a17b80c82..b09eb0fb3 100644 --- a/crates/meilisearch/src/routes/tasks_test.rs +++ b/crates/meilisearch/src/routes/tasks_test.rs @@ -228,7 +228,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r#" { - "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.", + "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 1dd16c474..1c987a70c 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -16,6 +16,7 @@ use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::locales::Locale; +use meilisearch_types::milli::index::{self, SearchParameters}; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; use meilisearch_types::milli::vector::Embedder; @@ -63,6 +64,8 @@ pub struct SearchQuery { pub q: Option, #[deserr(default, error = DeserrJsonError)] pub vector: Option>, + #[deserr(default, error = DeserrJsonError)] + pub media: Option, #[deserr(default, error = DeserrJsonError)] pub hybrid: Option, #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] @@ -119,9 +122,59 @@ pub struct SearchQuery { pub locales: Option>, } +impl From for SearchQuery { + fn from(parameters: SearchParameters) -> Self { + let SearchParameters { + hybrid, + limit, + sort, + distinct, + matching_strategy, + attributes_to_search_on, + ranking_score_threshold, + } = parameters; + + SearchQuery { + hybrid: hybrid.map(|index::HybridQuery { semantic_ratio, embedder }| HybridQuery { + semantic_ratio: SemanticRatio::try_from(semantic_ratio) + .ok() + .unwrap_or_else(DEFAULT_SEMANTIC_RATIO), + embedder, + }), + limit: limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT), + sort, + distinct, + matching_strategy: matching_strategy.map(MatchingStrategy::from).unwrap_or_default(), + attributes_to_search_on, + ranking_score_threshold: ranking_score_threshold.map(RankingScoreThreshold::from), + q: None, + vector: None, + media: None, + offset: DEFAULT_SEARCH_OFFSET(), + page: None, + hits_per_page: None, + attributes_to_retrieve: None, + retrieve_vectors: false, + attributes_to_crop: None, + crop_length: DEFAULT_CROP_LENGTH(), + attributes_to_highlight: None, + show_matches_position: false, + show_ranking_score: false, + show_ranking_score_details: false, + filter: None, + facets: None, + highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), + highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), + crop_marker: DEFAULT_CROP_MARKER(), + locales: None, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema, Serialize)] #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] pub struct RankingScoreThreshold(f64); + impl std::convert::TryFrom for RankingScoreThreshold { type Error = InvalidSearchRankingScoreThreshold; @@ -136,6 +189,14 @@ impl std::convert::TryFrom for RankingScoreThreshold { } } +impl From for RankingScoreThreshold { + fn from(threshold: index::RankingScoreThreshold) -> Self { + let threshold = threshold.as_f64(); + assert!((0.0..=1.0).contains(&threshold)); + RankingScoreThreshold(threshold) + } +} + #[derive(Debug, Clone, Copy, PartialEq, Deserr)] #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)] pub struct RankingScoreThresholdSimilar(f64); @@ -162,6 +223,7 @@ impl fmt::Debug for SearchQuery { let Self { q, vector, + media, hybrid, offset, limit, @@ -216,6 +278,9 @@ impl fmt::Debug for SearchQuery { ); } } + if let Some(media) = media { + debug.field("media", media); + } if let Some(hybrid) = hybrid { debug.field("hybrid", &hybrid); } @@ -279,8 +344,8 @@ impl fmt::Debug for SearchQuery { #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] #[serde(rename_all = "camelCase")] pub struct HybridQuery { - #[deserr(default, error = DeserrJsonError, default)] - #[schema(value_type = f32, default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(default, value_type = f32)] #[serde(default)] pub semantic_ratio: SemanticRatio, #[deserr(error = DeserrJsonError)] @@ -341,10 +406,10 @@ impl SearchKind { route: Route, ) -> Result<(String, Arc, bool), ResponseError> { let rtxn = index.read_txn()?; - let embedder_configs = index.embedding_configs(&rtxn)?; + let embedder_configs = index.embedding_configs().embedding_configs(&rtxn)?; let embedders = index_scheduler.embedders(index_uid, embedder_configs)?; - let (embedder, _, quantized) = embedders + let (embedder, quantized) = embedders .get(embedder_name) .ok_or(match route { Route::Search | Route::MultiSearch => { @@ -354,6 +419,7 @@ impl SearchKind { milli::UserError::InvalidSimilarEmbedder(embedder_name.to_owned()) } }) + .map(|runtime| (runtime.embedder.clone(), runtime.is_quantized)) .map_err(milli::Error::from)?; if let Some(vector_len) = vector_len { @@ -423,8 +489,10 @@ pub struct SearchQueryWithIndex { pub index_uid: IndexUid, #[deserr(default, error = DeserrJsonError)] pub q: Option, - #[deserr(default, error = DeserrJsonError)] + #[deserr(default, error = DeserrJsonError)] pub vector: Option>, + #[deserr(default, error = DeserrJsonError)] + pub media: Option, #[deserr(default, error = DeserrJsonError)] pub hybrid: Option, #[deserr(default, error = DeserrJsonError)] @@ -505,6 +573,7 @@ impl SearchQueryWithIndex { let SearchQuery { q, vector, + media, hybrid, offset, limit, @@ -535,6 +604,7 @@ impl SearchQueryWithIndex { index_uid, q, vector, + media, hybrid, offset: if offset == DEFAULT_SEARCH_OFFSET() { None } else { Some(offset) }, limit: if limit == DEFAULT_SEARCH_LIMIT() { None } else { Some(limit) }, @@ -569,6 +639,7 @@ impl SearchQueryWithIndex { federation_options, q, vector, + media, offset, limit, page, @@ -599,6 +670,7 @@ impl SearchQueryWithIndex { SearchQuery { q, vector, + media, offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()), limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()), page, @@ -717,6 +789,16 @@ impl From for TermsMatchingStrategy { } } +impl From for MatchingStrategy { + fn from(other: index::MatchingStrategy) -> Self { + match other { + index::MatchingStrategy::Last => Self::Last, + index::MatchingStrategy::All => Self::All, + index::MatchingStrategy::Frequency => Self::Frequency, + } + } +} + #[derive(Debug, Default, Clone, PartialEq, Eq, Deserr)] #[deserr(rename_all = camelCase)] pub enum FacetValuesSort { @@ -882,7 +964,7 @@ pub fn add_search_rules(filter: &mut Option, rules: IndexSearchRules) { } } -fn prepare_search<'t>( +pub fn prepare_search<'t>( index: &'t Index, rtxn: &'t RoTxn, query: &'t SearchQuery, @@ -890,6 +972,9 @@ fn prepare_search<'t>( time_budget: TimeBudget, features: RoFeatures, ) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> { + if query.media.is_some() { + features.check_multimodal("passing `media` in a search query")?; + } let mut search = index.search(rtxn); search.time_budget(time_budget); if let Some(ranking_score_threshold) = query.ranking_score_threshold { @@ -915,14 +1000,27 @@ fn prepare_search<'t>( let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10); + let q = query.q.as_deref(); + let media = query.media.as_ref(); + + let search_query = match (q, media) { + (Some(text), None) => milli::vector::SearchQuery::Text(text), + (q, media) => milli::vector::SearchQuery::Media { q, media }, + }; + embedder - .embed_search(query.q.as_ref().unwrap(), Some(deadline)) + .embed_search(search_query, Some(deadline)) .map_err(milli::vector::Error::from) .map_err(milli::Error::from)? } }; - - search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector)); + search.semantic( + embedder_name.clone(), + embedder.clone(), + *quantized, + Some(vector), + query.media.clone(), + ); } SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => { if let Some(q) = &query.q { @@ -934,6 +1032,7 @@ fn prepare_search<'t>( embedder.clone(), *quantized, query.vector.clone(), + query.media.clone(), ); } } @@ -1058,6 +1157,7 @@ pub fn perform_search( locales, // already used in prepare_search vector: _, + media: _, hybrid: _, offset: _, ranking_score_threshold: _, @@ -1260,7 +1360,6 @@ struct HitMaker<'a> { vectors_fid: Option, retrieve_vectors: RetrieveVectors, to_retrieve_ids: BTreeSet, - embedding_configs: Vec, formatter_builder: MatcherBuilder<'a>, formatted_options: BTreeMap, show_ranking_score: bool, @@ -1375,8 +1474,6 @@ impl<'a> HitMaker<'a> { &displayed_ids, ); - let embedding_configs = index.embedding_configs(rtxn)?; - Ok(Self { index, rtxn, @@ -1385,7 +1482,6 @@ impl<'a> HitMaker<'a> { vectors_fid, retrieve_vectors, to_retrieve_ids, - embedding_configs, formatter_builder, formatted_options, show_ranking_score: format.show_ranking_score, @@ -1431,14 +1527,8 @@ impl<'a> HitMaker<'a> { Some(Value::Object(map)) => map, _ => Default::default(), }; - for (name, vector) in self.index.embeddings(self.rtxn, id)? { - let user_provided = self - .embedding_configs - .iter() - .find(|conf| conf.name == name) - .is_some_and(|conf| conf.user_provided.contains(id)); - let embeddings = - ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided }; + for (name, (vector, regenerate)) in self.index.embeddings(self.rtxn, id)? { + let embeddings = ExplicitVectors { embeddings: Some(vector.into()), regenerate }; vectors.insert( name, serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?, diff --git a/crates/meilisearch/tests/auth/api_keys.rs b/crates/meilisearch/tests/auth/api_keys.rs index 7245c73a4..2688dd918 100644 --- a/crates/meilisearch/tests/auth/api_keys.rs +++ b/crates/meilisearch/tests/auth/api_keys.rs @@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###" { - "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", + "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`", "code": "invalid_api_key_actions", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" @@ -849,11 +849,27 @@ async fn list_api_keys() { "expiresAt": null, "createdAt": "[ignored]", "updatedAt": "[ignored]" + }, + { + "name": "Default Chat API Key", + "description": "Use it to chat and search from the frontend", + "key": "[ignored]", + "uid": "[ignored]", + "actions": [ + "chatCompletions", + "search" + ], + "indexes": [ + "*" + ], + "expiresAt": null, + "createdAt": "[ignored]", + "updatedAt": "[ignored]" } ], "offset": 0, "limit": 20, - "total": 3 + "total": 4 } "###); meili_snap::snapshot!(code, @"200 OK"); diff --git a/crates/meilisearch/tests/auth/errors.rs b/crates/meilisearch/tests/auth/errors.rs index 0e8968ef0..687cb67a0 100644 --- a/crates/meilisearch/tests/auth/errors.rs +++ b/crates/meilisearch/tests/auth/errors.rs @@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", + "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`", "code": "invalid_api_key_actions", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" diff --git a/crates/meilisearch/tests/batches/errors.rs b/crates/meilisearch/tests/batches/errors.rs index 7f5fedb6a..bfc0d9251 100644 --- a/crates/meilisearch/tests/batches/errors.rs +++ b/crates/meilisearch/tests/batches/errors.rs @@ -42,7 +42,7 @@ async fn batch_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r#" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/crates/meilisearch/tests/batches/mod.rs b/crates/meilisearch/tests/batches/mod.rs index 268147d02..7a21f1eca 100644 --- a/crates/meilisearch/tests/batches/mod.rs +++ b/crates/meilisearch/tests/batches/mod.rs @@ -327,7 +327,7 @@ async fn test_summarized_document_addition_or_update() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); @@ -371,7 +371,7 @@ async fn test_summarized_document_addition_or_update() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); } @@ -420,7 +420,7 @@ async fn test_summarized_delete_documents_by_batch() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); @@ -463,7 +463,7 @@ async fn test_summarized_delete_documents_by_batch() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); } @@ -512,7 +512,7 @@ async fn test_summarized_delete_documents_by_filter() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); @@ -557,7 +557,7 @@ async fn test_summarized_delete_documents_by_filter() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); @@ -602,7 +602,7 @@ async fn test_summarized_delete_documents_by_filter() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); } @@ -648,7 +648,7 @@ async fn test_summarized_delete_document_by_id() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); @@ -691,7 +691,7 @@ async fn test_summarized_delete_document_by_id() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); } @@ -759,7 +759,7 @@ async fn test_summarized_settings_update() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" } "###); } @@ -1315,7 +1315,7 @@ async fn test_summarized_batch_deletion() { "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "a batch of tasks of type `taskDeletion` cannot be batched with any other type of task" + "batchStrategy": "stopped after the last task of type `taskDeletion` because they cannot be batched with tasks of any other type." } "###); } diff --git a/crates/meilisearch/tests/common/mod.rs b/crates/meilisearch/tests/common/mod.rs index d1da616ad..8b4d21039 100644 --- a/crates/meilisearch/tests/common/mod.rs +++ b/crates/meilisearch/tests/common/mod.rs @@ -278,7 +278,7 @@ pub async fn shared_index_with_score_documents() -> &'static Index<'static, Shar static INDEX: OnceCell> = OnceCell::const_new(); INDEX.get_or_init(|| async { let server = Server::new_shared(); - let index = server._index("SCORE_DOCUMENTS").to_shared(); + let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared(); let documents = SCORE_DOCUMENTS.clone(); let (response, _code) = index._add_documents(documents, None).await; index.wait_task(response.uid()).await.succeeded(); diff --git a/crates/meilisearch/tests/common/server.rs b/crates/meilisearch/tests/common/server.rs index 431972983..4367650c5 100644 --- a/crates/meilisearch/tests/common/server.rs +++ b/crates/meilisearch/tests/common/server.rs @@ -347,6 +347,16 @@ impl Server { } } + pub fn unique_index_with_prefix(&self, prefix: &str) -> Index<'_> { + let uuid = Uuid::new_v4(); + Index { + uid: format!("{prefix}-{}", uuid), + service: &self.service, + encoder: Encoder::Plain, + marker: PhantomData, + } + } + pub fn unique_index_with_encoder(&self, encoder: Encoder) -> Index<'_> { let uuid = Uuid::new_v4(); Index { uid: uuid.to_string(), service: &self.service, encoder, marker: PhantomData } @@ -454,6 +464,7 @@ pub fn default_settings(dir: impl AsRef) -> Opt { skip_index_budget: true, // Having 2 threads makes the tests way faster max_indexing_threads: MaxThreads::from_str("2").unwrap(), + experimental_no_edition_2024_for_settings: false, }, experimental_enable_metrics: false, ..Parser::parse_from(None as Option<&str>) diff --git a/crates/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs index 1cf492fc0..b69d289e1 100644 --- a/crates/meilisearch/tests/documents/add_documents.rs +++ b/crates/meilisearch/tests/documents/add_documents.rs @@ -293,7 +293,7 @@ async fn add_csv_document() { "enqueuedAt": "[date]" } "#); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": "[uid]", @@ -358,7 +358,7 @@ async fn add_csv_document_with_types() { "enqueuedAt": "[date]" } "#); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": "[uid]", @@ -434,7 +434,7 @@ async fn add_csv_document_with_custom_delimiter() { "enqueuedAt": "[date]" } "#); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": "[uid]", @@ -991,7 +991,7 @@ async fn add_documents_no_index_creation() { let (response, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(code, @"202 Accepted"); snapshot!(response, @r###" @@ -1068,7 +1068,7 @@ async fn document_addition_with_primary_key() { } "#); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get_task(response.uid()).await; snapshot!(code, @"200 OK"); @@ -1120,7 +1120,7 @@ async fn document_addition_with_huge_int_primary_key() { let (response, code) = index.add_documents(documents, Some("primary")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(response, @r###" { @@ -1178,7 +1178,7 @@ async fn replace_document() { } "#); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -1190,7 +1190,7 @@ async fn replace_document() { let (task, code) = index.add_documents(documents, None).await; snapshot!(code,@"202 Accepted"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); @@ -1362,7 +1362,7 @@ async fn error_add_documents_bad_document_id() { } ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -1399,7 +1399,7 @@ async fn error_add_documents_bad_document_id() { } ]); let (value, _code) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.failed(); + server.wait_task(value.uid()).await.failed(); let (response, code) = index.get_task(value.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -1436,7 +1436,7 @@ async fn error_add_documents_bad_document_id() { } ]); let (value, _code) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.failed(); + server.wait_task(value.uid()).await.failed(); let (response, code) = index.get_task(value.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -1478,7 +1478,7 @@ async fn error_add_documents_missing_document_id() { } ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -1527,7 +1527,7 @@ async fn error_document_field_limit_reached_in_one_document() { let (response, code) = index.update_documents(documents, Some("id")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.failed(); + let response = server.wait_task(response.uid()).await.failed(); snapshot!(code, @"202 Accepted"); // Documents without a primary key are not accepted. snapshot!(response, @@ -1576,7 +1576,7 @@ async fn error_document_field_limit_reached_over_multiple_documents() { let (response, code) = index.update_documents(documents, Some("id")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(code, @"202 Accepted"); snapshot!(response, @r###" @@ -1611,7 +1611,7 @@ async fn error_document_field_limit_reached_over_multiple_documents() { let (response, code) = index.update_documents(documents, Some("id")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.failed(); + let response = server.wait_task(response.uid()).await.failed(); snapshot!(code, @"202 Accepted"); snapshot!(response, @r###" @@ -1660,7 +1660,7 @@ async fn error_document_field_limit_reached_in_one_nested_document() { let (response, code) = index.update_documents(documents, Some("id")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(code, @"202 Accepted"); // Documents without a primary key are not accepted. snapshot!(response, @@ -1705,7 +1705,7 @@ async fn error_document_field_limit_reached_over_multiple_documents_with_nested_ let (response, code) = index.update_documents(documents, Some("id")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(code, @"202 Accepted"); snapshot!(response, @r###" @@ -1741,7 +1741,7 @@ async fn error_document_field_limit_reached_over_multiple_documents_with_nested_ let (response, code) = index.update_documents(documents, Some("id")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(code, @"202 Accepted"); snapshot!(response, @r###" @@ -1790,7 +1790,7 @@ async fn add_documents_with_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - let response = index.wait_task(task.uid()).await.succeeded(); + let response = server.wait_task(task.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r#" { @@ -1914,7 +1914,7 @@ async fn update_documents_with_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - let response = index.wait_task(task.uid()).await.succeeded(); + let response = server.wait_task(task.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r#" { @@ -1983,7 +1983,7 @@ async fn update_documents_with_geo_field() { } ]); let (task, _status_code) = index.update_documents(updated_documents, None).await; - let response = index.wait_task(task.uid()).await.succeeded(); + let response = server.wait_task(task.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -2097,7 +2097,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".indexUid" => "[uuid]" }), @@ -2135,7 +2135,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2173,7 +2173,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2211,7 +2211,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2249,7 +2249,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2287,7 +2287,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2325,7 +2325,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2363,7 +2363,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2401,7 +2401,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2439,7 +2439,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2477,7 +2477,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2515,7 +2515,7 @@ async fn add_documents_invalid_geo_field() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2556,7 +2556,7 @@ async fn add_documents_invalid_geo_field() { let (response, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.failed(); + let response = server.wait_task(response.uid()).await.failed(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -2593,7 +2593,7 @@ async fn add_documents_invalid_geo_field() { let (response, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.failed(); + let response = server.wait_task(response.uid()).await.failed(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -2630,7 +2630,7 @@ async fn add_documents_invalid_geo_field() { let (response, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await.failed(); + let response = server.wait_task(response.uid()).await.failed(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -2674,7 +2674,7 @@ async fn add_invalid_geo_and_then_settings() { ]); let (ret, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let ret = index.wait_task(ret.uid()).await.succeeded(); + let ret = server.wait_task(ret.uid()).await.succeeded(); snapshot!(ret, @r###" { "uid": "[uid]", @@ -2697,7 +2697,7 @@ async fn add_invalid_geo_and_then_settings() { let (ret, code) = index.update_settings(json!({ "sortableAttributes": ["_geo"] })).await; snapshot!(code, @"202 Accepted"); - let ret = index.wait_task(ret.uid()).await.failed(); + let ret = server.wait_task(ret.uid()).await.failed(); snapshot!(ret, @r###" { "uid": "[uid]", @@ -2765,7 +2765,7 @@ async fn error_primary_key_inference() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); @@ -2806,7 +2806,7 @@ async fn error_primary_key_inference() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); @@ -2845,7 +2845,7 @@ async fn error_primary_key_inference() { ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); @@ -2884,12 +2884,12 @@ async fn add_documents_with_primary_key_twice() { ]); let (task, _status_code) = index.add_documents(documents.clone(), Some("title")).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _code) = index.get_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); let (task, _status_code) = index.add_documents(documents, Some("title")).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _code) = index.get_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); } @@ -2922,7 +2922,7 @@ async fn batch_several_documents_addition() { // wait first batch of documents to finish let finished_tasks = futures::future::join_all(waiter).await; for (task, _code) in finished_tasks { - index.wait_task(task.uid()).await; + server.wait_task(task.uid()).await; } // run a second completely failing batch @@ -2936,7 +2936,7 @@ async fn batch_several_documents_addition() { // wait second batch of documents to finish let finished_tasks = futures::future::join_all(waiter).await; for (task, _code) in finished_tasks { - index.wait_task(task.uid()).await; + server.wait_task(task.uid()).await; } let (response, _code) = index.filtered_tasks(&[], &["failed"], &[]).await; diff --git a/crates/meilisearch/tests/documents/delete_documents.rs b/crates/meilisearch/tests/documents/delete_documents.rs index 5ea122bd0..9c367cb51 100644 --- a/crates/meilisearch/tests/documents/delete_documents.rs +++ b/crates/meilisearch/tests/documents/delete_documents.rs @@ -5,11 +5,12 @@ use crate::json; #[actix_rt::test] async fn delete_one_document_unexisting_index() { + let server = Server::new_shared(); let index = shared_does_not_exists_index().await; let (task, code) = index.delete_document_by_filter_fail(json!({"filter": "a = b"})).await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); } #[actix_rt::test] @@ -19,7 +20,7 @@ async fn delete_one_unexisting_document() { index.create(None).await; let (response, code) = index.delete_document(0).await; assert_eq!(code, 202, "{response}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); } #[actix_rt::test] @@ -28,10 +29,10 @@ async fn delete_one_document() { let index = server.unique_index(); let (task, _status_code) = index.add_documents(json!([{ "id": 0, "content": "foobar" }]), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, status_code) = index.delete_document(0).await; assert_eq!(status_code, 202); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (_response, code) = index.get_document(0, None).await; assert_eq!(code, 404); @@ -44,7 +45,7 @@ async fn clear_all_documents_unexisting_index() { let (task, code) = index.clear_all_documents().await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); } #[actix_rt::test] @@ -57,11 +58,11 @@ async fn clear_all_documents() { None, ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index.clear_all_documents().await; assert_eq!(code, 202); - let _update = index.wait_task(task.uid()).await.succeeded(); + let _update = server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); @@ -72,11 +73,11 @@ async fn clear_all_documents_empty_index() { let server = Server::new_shared(); let index = server.unique_index(); let (task, _status_code) = index.create(None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index.clear_all_documents().await; assert_eq!(code, 202); - let _update = index.wait_task(task.uid()).await.succeeded(); + let _update = server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); @@ -95,7 +96,7 @@ async fn error_delete_batch_unexisting_index() { }); assert_eq!(code, 202); - let response = index.wait_task(task.uid()).await.failed(); + let response = server.wait_task(task.uid()).await.failed(); assert_eq!(response["error"], expected_response); } @@ -104,11 +105,11 @@ async fn delete_batch() { let server = Server::new_shared(); let index = server.unique_index(); let (task,_status_code) = index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index.delete_batch(vec![1, 0]).await; assert_eq!(code, 202); - let _update = index.wait_task(task.uid()).await.succeeded(); + let _update = server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 1); @@ -120,11 +121,11 @@ async fn delete_no_document_batch() { let server = Server::new_shared(); let index = server.unique_index(); let (task,_status_code) = index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.delete_batch(vec![]).await; assert_eq!(code, 202, "{response}"); - let _update = index.wait_task(response.uid()).await.succeeded(); + let _update = server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 3); @@ -146,7 +147,7 @@ async fn delete_document_by_filter() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (stats, _) = index.stats().await; snapshot!(json_string!(stats, { @@ -180,7 +181,7 @@ async fn delete_document_by_filter() { } "###); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": "[uid]", @@ -253,7 +254,7 @@ async fn delete_document_by_filter() { } "###); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": "[uid]", @@ -328,7 +329,7 @@ async fn delete_document_by_complex_filter() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index .delete_document_by_filter( json!({ "filter": ["color != red", "color != green", "color EXISTS"] }), @@ -345,7 +346,7 @@ async fn delete_document_by_complex_filter() { } "###); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": "[uid]", @@ -404,7 +405,7 @@ async fn delete_document_by_complex_filter() { } "###); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": "[uid]", diff --git a/crates/meilisearch/tests/documents/errors.rs b/crates/meilisearch/tests/documents/errors.rs index afca9498b..506be97d5 100644 --- a/crates/meilisearch/tests/documents/errors.rs +++ b/crates/meilisearch/tests/documents/errors.rs @@ -621,7 +621,7 @@ async fn delete_document_by_filter() { let (response, code) = index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"})).await; snapshot!(code, @"202 Accepted"); - let response = server.wait_task(response.uid()).await; + let response = server.wait_task(response.uid()).await.failed(); snapshot!(response, @r###" { "uid": "[uid]", @@ -665,7 +665,7 @@ async fn fetch_document_by_filter() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.fetch_documents(json!(null)).await; snapshot!(code, @"400 Bad Request"); diff --git a/crates/meilisearch/tests/documents/get_documents.rs b/crates/meilisearch/tests/documents/get_documents.rs index 4f82faf99..63dc224c2 100644 --- a/crates/meilisearch/tests/documents/get_documents.rs +++ b/crates/meilisearch/tests/documents/get_documents.rs @@ -23,7 +23,7 @@ async fn error_get_unexisting_document() { let server = Server::new_shared(); let index = server.unique_index(); let (task, _code) = index.create(None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_document(1, None).await; @@ -43,7 +43,7 @@ async fn get_document() { let server = Server::new_shared(); let index = server.unique_index(); let (task, _code) = index.create(None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let documents = json!([ { "id": 0, @@ -52,7 +52,7 @@ async fn get_document() { ]); let (task, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_document(0, None).await; assert_eq!(code, 200); assert_eq!( @@ -276,7 +276,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { let server = Server::new_shared(); let index = server.unique_index(); let (task, _code) = index.create(None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let documents = json!([ { @@ -293,7 +293,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { ]); let (task, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await; assert_eq!(code, 200); @@ -369,7 +369,7 @@ async fn get_document_by_filter() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.fetch_documents(json!({})).await; let (response2, code2) = index.get_all_documents_raw("").await; @@ -525,7 +525,7 @@ async fn get_document_by_ids() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index .fetch_documents(json!({ @@ -651,7 +651,7 @@ async fn get_document_invalid_ids() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.fetch_documents(json!({"ids": ["0", "illegal/docid"] })).await; let (response2, code2) = index.get_all_documents_raw("?ids=0,illegal/docid").await; @@ -683,7 +683,7 @@ async fn get_document_not_found_ids() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.fetch_documents(json!({"ids": ["0", 3, 42] })).await; let (response2, code2) = index.get_all_documents_raw("?ids=0,3,42").await; @@ -726,7 +726,7 @@ async fn get_document_by_ids_and_filter() { Some("id"), ) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.fetch_documents(json!({"ids": [2], "filter": "color = blue" })).await; @@ -854,7 +854,7 @@ async fn get_document_with_vectors() { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); // by default you shouldn't see the `_vectors` object let (documents, _code) = index.get_all_documents(Default::default()).await; diff --git a/crates/meilisearch/tests/documents/update_documents.rs b/crates/meilisearch/tests/documents/update_documents.rs index aaf529ce5..b74d91506 100644 --- a/crates/meilisearch/tests/documents/update_documents.rs +++ b/crates/meilisearch/tests/documents/update_documents.rs @@ -6,19 +6,18 @@ use crate::json; #[actix_rt::test] async fn error_document_update_create_index_bad_uid() { - let server = Server::new().await; - let index = server.index("883 fj!"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("883 fj!"); let (response, code) = index.update_documents(json!([{"id": 1}]), None).await; - let expected_response = json!({ - "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", - "code": "invalid_index_uid", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_index_uid" - }); - - assert_eq!(code, 400); - assert_eq!(response, expected_response); + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "`883 fj!-[uuid]` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", + "code": "invalid_index_uid", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_uid" + }"###); } #[actix_rt::test] @@ -35,7 +34,7 @@ async fn document_update_with_primary_key() { let (response, code) = index.update_documents(documents, Some("primary")).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get_task(response.uid()).await; assert_eq!(code, 200); @@ -64,7 +63,7 @@ async fn update_document() { let (response, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -76,7 +75,7 @@ async fn update_document() { let (response, code) = index.update_documents(documents, None).await; assert_eq!(code, 202, "response: {}", response); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get_task(response.uid()).await; assert_eq!(code, 200); @@ -108,7 +107,7 @@ async fn update_document_gzip_encoded() { let (response, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -120,7 +119,7 @@ async fn update_document_gzip_encoded() { let (response, code) = index.update_documents(documents, None).await; assert_eq!(code, 202, "response: {}", response); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get_task(response.uid()).await; assert_eq!(code, 200); @@ -143,7 +142,7 @@ async fn update_larger_dataset() { let index = server.unique_index(); let documents = serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(); let (task, _code) = index.update_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); assert_eq!(response["type"], "documentAdditionOrUpdate"); @@ -167,7 +166,7 @@ async fn error_update_documents_bad_document_id() { } ]); let (task, _code) = index.update_documents(documents, None).await; - let response = index.wait_task(task.uid()).await; + let response = server.wait_task(task.uid()).await; assert_eq!(response["status"], json!("failed")); assert_eq!( response["error"]["message"], @@ -195,7 +194,7 @@ async fn error_update_documents_missing_document_id() { } ]); let (task, _code) = index.update_documents(documents, None).await; - let response = index.wait_task(task.uid()).await; + let response = server.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); assert_eq!( response["error"]["message"], @@ -220,7 +219,7 @@ async fn update_faceted_document() { })) .await; assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents: Vec<_> = (0..1000) .map(|id| { @@ -234,7 +233,7 @@ async fn update_faceted_document() { let (response, code) = index.add_documents(documents.into(), None).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -246,7 +245,7 @@ async fn update_faceted_document() { let (response, code) = index.update_documents(documents, None).await; assert_eq!(code, 202, "response: {}", response); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); index .search(json!({"limit": 10}), |response, code| { diff --git a/crates/meilisearch/tests/dumps/mod.rs b/crates/meilisearch/tests/dumps/mod.rs index 3ba3c20eb..9b111186d 100644 --- a/crates/meilisearch/tests/dumps/mod.rs +++ b/crates/meilisearch/tests/dumps/mod.rs @@ -2187,7 +2187,9 @@ async fn import_dump_v6_containing_experimental_features() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); @@ -2312,7 +2314,9 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); @@ -2417,7 +2421,9 @@ async fn generate_and_import_dump_containing_vectors() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); diff --git a/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/batches.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/batches.snap index 2f3b0a7f9..81d3a1981 100644 --- a/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/batches.snap +++ b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v6_containing_batches_and_enqueued_tasks/batches.snap @@ -27,7 +27,7 @@ source: crates/meilisearch/tests/dumps/mod.rs "duration": "[date]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "batched all enqueued tasks" + "batchStrategy": "batched all enqueued tasks" }, { "uid": 1, @@ -51,7 +51,7 @@ source: crates/meilisearch/tests/dumps/mod.rs "duration": "PT0.144827890S", "startedAt": "2025-02-04T10:15:21.275640274Z", "finishedAt": "2025-02-04T10:15:21.420468164Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 0, @@ -72,7 +72,7 @@ source: crates/meilisearch/tests/dumps/mod.rs "duration": "PT0.032902186S", "startedAt": "2025-02-04T10:14:43.559526162Z", "finishedAt": "2025-02-04T10:14:43.592428348Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 3, diff --git a/crates/meilisearch/tests/features/mod.rs b/crates/meilisearch/tests/features/mod.rs index 34cd40e38..ec5838d35 100644 --- a/crates/meilisearch/tests/features/mod.rs +++ b/crates/meilisearch/tests/features/mod.rs @@ -24,7 +24,9 @@ async fn experimental_features() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); @@ -39,7 +41,9 @@ async fn experimental_features() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); @@ -54,7 +58,9 @@ async fn experimental_features() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); @@ -70,7 +76,9 @@ async fn experimental_features() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); @@ -86,7 +94,9 @@ async fn experimental_features() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); } @@ -109,7 +119,9 @@ async fn experimental_feature_metrics() { "containsFilter": false, "network": false, "getTaskDocumentsRoute": false, - "compositeEmbedders": false + "compositeEmbedders": false, + "chatCompletions": false, + "multimodal": false } "###); @@ -156,7 +168,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`", + "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`, `chatCompletions`, `multimodal`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/crates/meilisearch/tests/index/create_index.rs b/crates/meilisearch/tests/index/create_index.rs index e8efd14e2..dc178919e 100644 --- a/crates/meilisearch/tests/index/create_index.rs +++ b/crates/meilisearch/tests/index/create_index.rs @@ -17,7 +17,7 @@ async fn create_index_no_primary_key() { assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(response.uid()).await; + let response = server.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -34,7 +34,7 @@ async fn create_index_with_gzip_encoded_request() { assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(response.uid()).await; + let response = server.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -46,8 +46,10 @@ async fn create_index_with_gzip_encoded_request_and_receiving_brotli_encoded_res let server = Server::new_shared(); let app = server.init_web_app().await; + let index = server.unique_index_with_prefix("test"); + let body = serde_json::to_string(&json!({ - "uid": "test", + "uid": index.uid.clone(), "primaryKey": None::<&str>, })) .unwrap(); @@ -68,7 +70,7 @@ async fn create_index_with_gzip_encoded_request_and_receiving_brotli_encoded_res let parsed_response = serde_json::from_slice::(decoded.into().as_ref()).expect("Expecting valid json"); - assert_eq!(parsed_response["indexUid"], "test"); + assert_eq!(parsed_response["indexUid"], index.uid); } #[actix_rt::test] @@ -81,7 +83,7 @@ async fn create_index_with_zlib_encoded_request() { assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(response.uid()).await; + let response = server.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -98,7 +100,7 @@ async fn create_index_with_brotli_encoded_request() { assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(response.uid()).await; + let response = server.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -115,7 +117,7 @@ async fn create_index_with_primary_key() { assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(response.uid()).await.succeeded(); + let response = server.wait_task(response.uid()).await.succeeded(); assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -130,7 +132,7 @@ async fn create_index_with_invalid_primary_key() { let index = server.unique_index(); let (response, code) = index.add_documents(documents, Some("title")).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.failed(); + server.wait_task(response.uid()).await.failed(); let (response, code) = index.get().await; assert_eq!(code, 200); @@ -140,7 +142,7 @@ async fn create_index_with_invalid_primary_key() { let (response, code) = index.add_documents(documents, Some("id")).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.failed(); + server.wait_task(response.uid()).await.failed(); let (response, code) = index.get().await; assert_eq!(code, 200); @@ -179,7 +181,7 @@ async fn error_create_existing_index() { let (task, _) = index.create(Some("primary")).await; - let response = index.wait_task(task.uid()).await; + let response = server.wait_task(task.uid()).await; let msg = format!( "Index `{}` already exists.", task["indexUid"].as_str().expect("indexUid should exist").trim_matches('"') diff --git a/crates/meilisearch/tests/index/delete_index.rs b/crates/meilisearch/tests/index/delete_index.rs index 713891420..085b47294 100644 --- a/crates/meilisearch/tests/index/delete_index.rs +++ b/crates/meilisearch/tests/index/delete_index.rs @@ -9,7 +9,7 @@ async fn create_and_delete_index() { assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); assert_eq!(index.get().await.1, 200); @@ -17,18 +17,19 @@ async fn create_and_delete_index() { assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); assert_eq!(index.get().await.1, 404); } #[actix_rt::test] async fn error_delete_unexisting_index() { + let server = Server::new_shared(); let index = shared_does_not_exists_index().await; let (task, code) = index.delete_index_fail().await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); let expected_response = json!({ "message": "Index `DOES_NOT_EXISTS` not found.", @@ -37,7 +38,7 @@ async fn error_delete_unexisting_index() { "link": "https://docs.meilisearch.com/errors#index_not_found" }); - let response = index.wait_task(task.uid()).await; + let response = server.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); assert_eq!(response["error"], expected_response); } @@ -58,7 +59,7 @@ async fn loop_delete_add_documents() { } for task in tasks { - let response = index.wait_task(task).await.succeeded(); + let response = server.wait_task(task).await.succeeded(); assert_eq!(response["status"], "succeeded", "{}", response); } } diff --git a/crates/meilisearch/tests/index/get_index.rs b/crates/meilisearch/tests/index/get_index.rs index b26eaeb9a..ece479513 100644 --- a/crates/meilisearch/tests/index/get_index.rs +++ b/crates/meilisearch/tests/index/get_index.rs @@ -1,8 +1,8 @@ -use crate::json; use meili_snap::{json_string, snapshot}; use serde_json::Value; use crate::common::{shared_does_not_exists_index, Server}; +use crate::json; #[actix_rt::test] async fn create_and_get_index() { @@ -12,7 +12,7 @@ async fn create_and_get_index() { assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get().await; diff --git a/crates/meilisearch/tests/index/stats.rs b/crates/meilisearch/tests/index/stats.rs index 90c77cec8..610601318 100644 --- a/crates/meilisearch/tests/index/stats.rs +++ b/crates/meilisearch/tests/index/stats.rs @@ -10,7 +10,7 @@ async fn stats() { assert_eq!(code, 202); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.stats().await; @@ -33,7 +33,7 @@ async fn stats() { let (response, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.stats().await; diff --git a/crates/meilisearch/tests/index/update_index.rs b/crates/meilisearch/tests/index/update_index.rs index 291700728..1c781c386 100644 --- a/crates/meilisearch/tests/index/update_index.rs +++ b/crates/meilisearch/tests/index/update_index.rs @@ -12,10 +12,10 @@ async fn update_primary_key() { let (task, code) = index.create(None).await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, _status_code) = index.update(Some("primary")).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get().await; @@ -42,12 +42,12 @@ async fn create_and_update_with_different_encoding() { let (create_task, code) = index.create(None).await; assert_eq!(code, 202); - index.wait_task(create_task.uid()).await.succeeded(); + server.wait_task(create_task.uid()).await.succeeded(); let index = index.with_encoder(Encoder::Brotli); let (task, _status_code) = index.update(Some("primary")).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); } #[actix_rt::test] @@ -58,23 +58,24 @@ async fn update_nothing() { assert_eq!(code, 202); - index.wait_task(task1.uid()).await.succeeded(); + server.wait_task(task1.uid()).await.succeeded(); let (task2, code) = index.update(None).await; assert_eq!(code, 202); - index.wait_task(task2.uid()).await.succeeded(); + server.wait_task(task2.uid()).await.succeeded(); } #[actix_rt::test] async fn error_update_existing_primary_key() { + let server = Server::new_shared(); let index = shared_index_with_documents().await; let (update_task, code) = index.update_index_fail(Some("primary")).await; assert_eq!(code, 202); - let response = index.wait_task(update_task.uid()).await.failed(); + let response = server.wait_task(update_task.uid()).await.failed(); let expected_response = json!({ "message": format!("Index `{}`: Index already has a primary key: `id`.", index.uid), @@ -88,12 +89,13 @@ async fn error_update_existing_primary_key() { #[actix_rt::test] async fn error_update_unexisting_index() { + let server = Server::new_shared(); let index = shared_does_not_exists_index().await; let (task, code) = index.update_index_fail(Some("my-primary-key")).await; assert_eq!(code, 202); - let response = index.wait_task(task.uid()).await.failed(); + let response = server.wait_task(task.uid()).await.failed(); let expected_response = json!({ "message": format!("Index `{}` not found.", index.uid), diff --git a/crates/meilisearch/tests/search/distinct.rs b/crates/meilisearch/tests/search/distinct.rs index bdc5875e0..33a4c5453 100644 --- a/crates/meilisearch/tests/search/distinct.rs +++ b/crates/meilisearch/tests/search/distinct.rs @@ -152,7 +152,7 @@ async fn distinct_search_with_offset_no_ranking() { let documents = DOCUMENTS.clone(); index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; let (task, _status_code) = index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); fn get_hits(response: &Value) -> Vec<&str> { let hits_array = response["hits"].as_array().unwrap(); @@ -211,7 +211,7 @@ async fn distinct_search_with_pagination_no_ranking() { let documents = DOCUMENTS.clone(); index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; let (task, _status_code) = index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); fn get_hits(response: &Value) -> Vec<&str> { let hits_array = response["hits"].as_array().unwrap(); @@ -281,7 +281,7 @@ async fn distinct_at_search_time() { let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await; - let task = index.wait_task(task.uid()).await.succeeded(); + let task = server.wait_task(task.uid()).await.succeeded(); snapshot!(task, name: "succeed"); fn get_hits(response: &Value) -> Vec { diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index ba7853d11..363ece067 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -1,10 +1,9 @@ use meili_snap::*; +use super::test_settings_documents_indexing_swapping_and_search; use crate::common::{shared_does_not_exists_index, Server, DOCUMENTS, NESTED_DOCUMENTS}; use crate::json; -use super::test_settings_documents_indexing_swapping_and_search; - #[actix_rt::test] async fn search_unexisting_index() { let index = shared_does_not_exists_index().await; @@ -426,7 +425,7 @@ async fn search_non_filterable_facets() { let index = server.unique_index(); let (response, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; // Wait for the settings update to complete - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"facets": ["doggo"]})).await; snapshot!(code, @"400 Bad Request"); @@ -457,7 +456,7 @@ async fn search_non_filterable_facets_multiple_filterable() { let index = server.unique_index(); let (response, _code) = index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"facets": ["doggo"]})).await; snapshot!(code, @"400 Bad Request"); @@ -487,7 +486,7 @@ async fn search_non_filterable_facets_no_filterable() { let server = Server::new_shared(); let index = server.unique_index(); let (response, _code) = index.update_settings(json!({"filterableAttributes": []})).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"facets": ["doggo"]})).await; snapshot!(code, @"400 Bad Request"); @@ -518,7 +517,7 @@ async fn search_non_filterable_facets_multiple_facets() { let index = server.unique_index(); let (response, _uid) = index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"facets": ["doggo", "neko"]})).await; snapshot!(code, @"400 Bad Request"); @@ -1002,7 +1001,7 @@ async fn sort_geo_reserved_attribute() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geo` is a reserved keyword and thus can't be used as a sort expression. Use the _geoPoint(latitude, longitude) built-in rule to sort on _geo field coordinates.", @@ -1029,7 +1028,7 @@ async fn sort_reserved_attribute() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoDistance` is a reserved keyword and thus can't be used as a sort expression.", @@ -1055,7 +1054,7 @@ async fn sort_unsortable_attribute() { let server = Server::new_shared(); let index = server.unique_index(); let (response, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let expected_response = json!({ "message": format!("Index `{}`: Attribute `title` is not sortable. Available sortable attributes are: `id`.", index.uid), @@ -1082,7 +1081,7 @@ async fn sort_invalid_syntax() { let index = server.unique_index(); let (response, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let expected_response = json!({ "message": "Invalid syntax for the sort parameter: expected expression ending by `:asc` or `:desc`, found `title`.", @@ -1113,7 +1112,7 @@ async fn sort_unset_ranking_rule() { json!({"sortableAttributes": ["title"], "rankingRules": ["proximity", "exactness"]}), ) .await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let expected_response = json!({ "message": format!("Index `{}`: You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", index.uid), @@ -1200,7 +1199,7 @@ async fn distinct_at_search_time() { let index = server.unique_index(); let (response, _code) = index.add_documents(json!([{"id": 1, "color": "Doggo", "machin": "Action"}]), None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; @@ -1215,7 +1214,7 @@ async fn distinct_at_search_time() { "###); let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; @@ -1230,7 +1229,7 @@ async fn distinct_at_search_time() { "###); let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs index 57d2cfcd2..da713fc22 100644 --- a/crates/meilisearch/tests/search/facet_search.rs +++ b/crates/meilisearch/tests/search/facet_search.rs @@ -50,11 +50,11 @@ async fn test_settings_documents_indexing_swapping_and_facet_search( let (task, code) = index.add_documents(documents.clone(), None).await; assert_eq!(code, 202, "{}", task); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index.update_settings(settings.clone()).await; assert_eq!(code, 202, "{}", task); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(query.clone()).await; insta::allow_duplicates! { @@ -70,11 +70,11 @@ async fn test_settings_documents_indexing_swapping_and_facet_search( let (task, code) = index.update_settings(settings.clone()).await; assert_eq!(code, 202, "{}", task); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index.add_documents(documents.clone(), None).await; assert_eq!(code, 202, "{}", task); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(query.clone()).await; insta::allow_duplicates! { @@ -94,7 +94,7 @@ async fn simple_facet_search() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["genres"])).await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -207,10 +207,10 @@ async fn simple_facet_search_on_movies() { let (response, code) = index.update_settings_filterable_attributes(json!(["genres", "color"])).await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetQuery": "", "facetName": "genres", "q": "" })).await; @@ -228,7 +228,7 @@ async fn advanced_facet_search() { index.update_settings_filterable_attributes(json!(["genres"])).await; index.update_settings_typo_tolerance(json!({ "enabled": false })).await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await; @@ -252,7 +252,7 @@ async fn more_advanced_facet_search() { index.update_settings_filterable_attributes(json!(["genres"])).await; index.update_settings_typo_tolerance(json!({ "disableOnWords": ["adventre"] })).await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await; @@ -276,7 +276,7 @@ async fn simple_facet_search_with_max_values() { index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await; index.update_settings_filterable_attributes(json!(["genres"])).await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -298,7 +298,7 @@ async fn simple_facet_search_by_count_with_max_values() { .await; index.update_settings_filterable_attributes(json!(["genres"])).await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -314,7 +314,7 @@ async fn non_filterable_facet_search_error() { let documents = DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -333,7 +333,7 @@ async fn facet_search_dont_support_words() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["genres"])).await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "words"})).await; @@ -351,7 +351,7 @@ async fn simple_facet_search_with_sort_by_count() { index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await; index.update_settings_filterable_attributes(json!(["genres"])).await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -370,7 +370,7 @@ async fn add_documents_and_deactivate_facet_search() { let documents = DOCUMENTS.clone(); let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ "facetSearch": false, @@ -378,7 +378,7 @@ async fn add_documents_and_deactivate_facet_search() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -406,10 +406,10 @@ async fn deactivate_facet_search_and_add_documents() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents = DOCUMENTS.clone(); let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -437,10 +437,10 @@ async fn deactivate_facet_search_add_documents_and_activate_facet_search() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents = DOCUMENTS.clone(); let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -448,7 +448,7 @@ async fn deactivate_facet_search_add_documents_and_activate_facet_search() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -469,10 +469,10 @@ async fn deactivate_facet_search_add_documents_and_reset_facet_search() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let documents = DOCUMENTS.clone(); let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -480,7 +480,7 @@ async fn deactivate_facet_search_add_documents_and_reset_facet_search() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -920,13 +920,13 @@ async fn distinct_facet_search_on_movies() { let (response, code) = index.update_settings_filterable_attributes(json!(["genres", "color"])).await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.update_settings_distinct_attribute(json!("color")).await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "" })).await; diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index 9670a036c..ffa025f5c 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -3,13 +3,11 @@ use meilisearch::Opt; use tempfile::TempDir; use super::test_settings_documents_indexing_swapping_and_search; -use crate::{ - common::{ - default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server, - DOCUMENTS, NESTED_DOCUMENTS, - }, - json, +use crate::common::{ + default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server, + DOCUMENTS, NESTED_DOCUMENTS, }; +use crate::json; #[actix_rt::test] async fn search_with_filter_string_notation() { @@ -92,7 +90,7 @@ async fn search_with_contains_filter() { let documents = DOCUMENTS.clone(); let (request, _code) = index.add_documents(documents, None).await; - index.wait_task(request.uid()).await.succeeded(); + server.wait_task(request.uid()).await.succeeded(); let (response, code) = index .search_post(json!({ @@ -259,7 +257,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { let (task, code) = index.add_documents(NESTED_DOCUMENTS.clone(), None).await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index .update_settings(json!({"filterableAttributes": [{ @@ -271,7 +269,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { }]})) .await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // Check if the Equality filter works index @@ -336,7 +334,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { }]})) .await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // Check if the Equality filter works index @@ -447,7 +445,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { }]})) .await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // Check if the Equality filter returns an error index @@ -546,7 +544,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { }]})) .await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // Check if the Equality filter works index diff --git a/crates/meilisearch/tests/search/formatted.rs b/crates/meilisearch/tests/search/formatted.rs index 2b9383034..43a59e823 100644 --- a/crates/meilisearch/tests/search/formatted.rs +++ b/crates/meilisearch/tests/search/formatted.rs @@ -26,7 +26,7 @@ async fn search_formatted_from_sdk() { { "id": 42, "title": "The Hitchhiker's Guide to the Galaxy" } ]); let (response, _) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + server.wait_task(response.uid()).await; index .search( @@ -65,7 +65,7 @@ async fn formatted_contain_wildcard() { let documents = NESTED_DOCUMENTS.clone(); let (response, _) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }), |response, code| @@ -398,7 +398,7 @@ async fn displayedattr_2_smol() { let documents = NESTED_DOCUMENTS.clone(); let (response, _) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); index .search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }), @@ -596,7 +596,7 @@ async fn test_cjk_highlight() { { "id": 1, "title": "大卫到了扫罗那里" }, ]); let (response, _) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); index .search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| { diff --git a/crates/meilisearch/tests/search/geo.rs b/crates/meilisearch/tests/search/geo.rs index 5f4eff947..b241386ed 100644 --- a/crates/meilisearch/tests/search/geo.rs +++ b/crates/meilisearch/tests/search/geo.rs @@ -1,11 +1,10 @@ use meili_snap::{json_string, snapshot}; use meilisearch_types::milli::constants::RESERVED_GEO_FIELD_NAME; +use super::test_settings_documents_indexing_swapping_and_search; use crate::common::shared_index_with_geo_documents; use crate::json; -use super::test_settings_documents_indexing_swapping_and_search; - #[actix_rt::test] async fn geo_sort_with_geo_strings() { let index = shared_index_with_geo_documents().await; diff --git a/crates/meilisearch/tests/search/hybrid.rs b/crates/meilisearch/tests/search/hybrid.rs index c6eb39a3a..d95e6fb64 100644 --- a/crates/meilisearch/tests/search/hybrid.rs +++ b/crates/meilisearch/tests/search/hybrid.rs @@ -17,11 +17,11 @@ async fn index_with_documents_user_provided<'a>( "dimensions": 2}}} )) .await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.add_documents(documents.clone(), None).await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); index } @@ -37,11 +37,11 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Val }}} )) .await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.add_documents(documents.clone(), None).await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); index } @@ -499,7 +499,7 @@ async fn query_combination() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid request: missing `hybrid` parameter when `vector` is present.", + "message": "Invalid request: missing `hybrid` parameter when `vector` or `media` are present.", "code": "missing_search_hybrid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#missing_search_hybrid" @@ -543,7 +543,7 @@ async fn distinct_is_applied() { let (response, code) = index.update_settings(json!({ "distinctAttribute": "distinct" } )).await; assert_eq!(202, code, "{:?}", response); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); // pure keyword let (response, code) = index @@ -633,7 +633,7 @@ async fn retrieve_vectors() { .update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} )) .await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .search_post( @@ -683,7 +683,7 @@ async fn retrieve_vectors() { let (response, code) = index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .search_post( diff --git a/crates/meilisearch/tests/search/locales.rs b/crates/meilisearch/tests/search/locales.rs index b1c9b2bc2..96c7fc7f5 100644 --- a/crates/meilisearch/tests/search/locales.rs +++ b/crates/meilisearch/tests/search/locales.rs @@ -99,7 +99,7 @@ async fn simple_search() { ) .await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // english index @@ -147,23 +147,20 @@ async fn simple_search() { .search( json!({"q": "進撃", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { - snapshot!(response, @r###" + snapshot!(response, @r#" { "hits": [ { "id": 852 - }, - { - "id": 853 } ], "query": "進撃", "processingTimeMs": "[duration]", "limit": 20, "offset": 0, - "estimatedTotalHits": 2 + "estimatedTotalHits": 1 } - "###); + "#); snapshot!(code, @"200 OK"); }, ) @@ -172,23 +169,20 @@ async fn simple_search() { // chinese index .search(json!({"q": "进击", "attributesToRetrieve": ["id"]}), |response, code| { - snapshot!(response, @r###" + snapshot!(response, @r#" { "hits": [ { "id": 853 - }, - { - "id": 852 } ], "query": "进击", "processingTimeMs": "[duration]", "limit": 20, "offset": 0, - "estimatedTotalHits": 2 + "estimatedTotalHits": 1 } - "###); + "#); snapshot!(code, @"200 OK"); }) .await; @@ -221,7 +215,7 @@ async fn force_locales() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // chinese detection index @@ -299,7 +293,7 @@ async fn force_locales_with_pattern() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // chinese detection index @@ -375,7 +369,7 @@ async fn force_locales_with_pattern_nested() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // chinese index @@ -450,7 +444,7 @@ async fn force_different_locales_with_pattern() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // force chinese index @@ -528,7 +522,7 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // auto infer any language index @@ -602,7 +596,7 @@ async fn auto_infer_locales_at_search() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -701,7 +695,7 @@ async fn force_different_locales_with_pattern_nested() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // chinese index @@ -779,7 +773,7 @@ async fn settings_change() { let documents = NESTED_DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _) = index .update_settings(json!({ "searchableAttributes": ["document_en", "document_ja", "document_zh"], @@ -798,7 +792,7 @@ async fn settings_change() { "enqueuedAt": "[date]" } "###); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); // chinese index @@ -861,7 +855,7 @@ async fn settings_change() { "enqueuedAt": "[date]" } "###); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); // chinese index @@ -916,7 +910,7 @@ async fn invalid_locales() { ) .await; let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"q": "Atta", "locales": ["invalid"]})).await; snapshot!(code, @"400 Bad Request"); @@ -1034,7 +1028,7 @@ async fn simple_facet_search() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _) = index .facet_search(json!({"facetName": "name_zh", "facetQuery": "進撃", "locales": ["cmn"]})) @@ -1096,7 +1090,7 @@ async fn facet_search_with_localized_attributes() { } "###); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _) = index .facet_search(json!({"facetName": "name_zh", "facetQuery": "进击", "locales": ["cmn"]})) @@ -1165,7 +1159,7 @@ async fn swedish_search() { ] })) .await; - index.wait_task(_response.uid()).await.succeeded(); + server.wait_task(_response.uid()).await.succeeded(); // infer swedish index @@ -1286,7 +1280,7 @@ async fn german_search() { ] })) .await; - index.wait_task(_response.uid()).await.succeeded(); + server.wait_task(_response.uid()).await.succeeded(); // infer swedish index diff --git a/crates/meilisearch/tests/search/matching_strategy.rs b/crates/meilisearch/tests/search/matching_strategy.rs index ece320b2a..10b93be76 100644 --- a/crates/meilisearch/tests/search/matching_strategy.rs +++ b/crates/meilisearch/tests/search/matching_strategy.rs @@ -9,7 +9,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) let index = server.unique_index(); let (task, _status_code) = index.add_documents(documents.clone(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index } diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index be476da35..3f70e1ba9 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -38,11 +38,11 @@ async fn test_settings_documents_indexing_swapping_and_search( let (task, code) = index.add_documents(documents.clone(), None).await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index.update_settings(settings.clone()).await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index.search(query.clone(), test.clone()).await; @@ -51,11 +51,11 @@ async fn test_settings_documents_indexing_swapping_and_search( let (task, code) = index.update_settings(settings.clone()).await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, code) = index.add_documents(documents.clone(), None).await; assert_eq!(code, 202, "{task}"); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index.search(query.clone(), test.clone()).await; } @@ -104,7 +104,7 @@ async fn bug_5547() { let server = Server::new_shared(); let index = server.unique_index(); let (response, _code) = index.create(None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let mut documents = Vec::new(); for i in 0..65_535 { @@ -112,7 +112,7 @@ async fn bug_5547() { } let (response, _code) = index.add_documents(json!(documents), Some("id")).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"q": "title"})).await; assert_eq!(code, 200); snapshot!(response["hits"], @r###"[{"id":0,"title":"title0"},{"id":1,"title":"title1"},{"id":10,"title":"title10"},{"id":100,"title":"title100"},{"id":101,"title":"title101"},{"id":102,"title":"title102"},{"id":103,"title":"title103"},{"id":104,"title":"title104"},{"id":105,"title":"title105"},{"id":106,"title":"title106"},{"id":107,"title":"title107"},{"id":108,"title":"title108"},{"id":1000,"title":"title1000"},{"id":1001,"title":"title1001"},{"id":1002,"title":"title1002"},{"id":1003,"title":"title1003"},{"id":1004,"title":"title1004"},{"id":1005,"title":"title1005"},{"id":1006,"title":"title1006"},{"id":1007,"title":"title1007"}]"###); @@ -131,7 +131,7 @@ async fn search_with_stop_word() { let documents = DOCUMENTS.clone(); let (task, _code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // prefix search index @@ -196,7 +196,7 @@ async fn search_with_typo_settings() { let documents = DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "287947" }), |response, code| { @@ -228,7 +228,7 @@ async fn phrase_search_with_stop_word() { let documents = DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "how \"to\" train \"the" }), |response, code| { @@ -308,11 +308,11 @@ async fn negative_special_cases_search() { let documents = DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, _status_code) = index.update_settings(json!({"synonyms": { "escape": ["gläss"] }})).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass index @@ -338,7 +338,7 @@ async fn test_kanji_language_detection() { { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" } ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "東京"}), |response, code| { @@ -361,10 +361,10 @@ async fn test_thai_language() { { "id": 2, "title": "สบู่สมุนไพรฝางแดงผสมว่านหางจรเข้ 100 กรัม จำนวน 6 ก้อน" } ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, _status_code) = index.update_settings(json!({"rankingRules": ["exactness"]})).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "สบู"}), |response, code| { @@ -586,7 +586,7 @@ async fn displayed_attributes() { let documents = DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.search_post(json!({ "attributesToRetrieve": ["title", "id"] })).await; @@ -601,7 +601,7 @@ async fn placeholder_search_is_hard_limited() { let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); let (task, _status_code) = index.add_documents(documents.into(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -630,7 +630,7 @@ async fn placeholder_search_is_hard_limited() { let (task, _status_code) = index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -665,7 +665,7 @@ async fn search_is_hard_limited() { let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); let (task, _status_code) = index.add_documents(documents.into(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -696,7 +696,7 @@ async fn search_is_hard_limited() { let (task, _status_code) = index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -735,7 +735,7 @@ async fn faceting_max_values_per_facet() { let documents: Vec<_> = (0..10_000).map(|id| json!({ "id": id, "number": id * 10 })).collect(); let (task, _status_code) = index.add_documents(json!(documents), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -752,7 +752,7 @@ async fn faceting_max_values_per_facet() { let (task, _status_code) = index.update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } })).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -1033,7 +1033,7 @@ async fn test_degraded_score_details() { index.add_documents(json!(documents), None).await; // We can't really use anything else than 0ms here; otherwise, the test will get flaky. let (res, _code) = index.update_settings(json!({ "searchCutoffMs": 0 })).await; - index.wait_task(res.uid()).await.succeeded(); + server.wait_task(res.uid()).await.succeeded(); index .search( @@ -1126,7 +1126,7 @@ async fn camelcased_words() { { "id": 4, "title": "testab" }, ]); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "deLonghi"}), |response, code| { @@ -1345,12 +1345,12 @@ async fn simple_search_with_strange_synonyms() { let (task, _status_code) = index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await; - let r = index.wait_task(task.uid()).await.succeeded(); + let r = server.wait_task(task.uid()).await.succeeded(); snapshot!(r["status"], @r###""succeeded""###); let documents = DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "How to train"}), |response, code| { @@ -1416,11 +1416,11 @@ async fn change_attributes_settings() { let documents = NESTED_DOCUMENTS.clone(); let (task, _status_code) = index.add_documents(json!(documents), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task,_status_code) = index.update_settings(json!({ "searchableAttributes": ["father", "mother", "doggos"], "filterableAttributes": ["doggos"] })).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // search index @@ -1923,7 +1923,7 @@ async fn change_facet_casing() { })) .await; assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index .add_documents( @@ -1936,7 +1936,7 @@ async fn change_facet_casing() { None, ) .await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index .add_documents( @@ -1949,7 +1949,7 @@ async fn change_facet_casing() { None, ) .await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); index .search(json!({ "facets": ["dog"] }), |response, code| { @@ -2054,3 +2054,76 @@ async fn test_exact_typos_terms() { ) .await; } + +#[actix_rt::test] +async fn simple_search_changing_unrelated_settings() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _status_code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + index + .search(json!({"q": "Dragon"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "color": [ + "green", + "red" + ] + } + ] + "###); + }) + .await; + + let (task, _status_code) = + index.update_settings(json!({ "filterableAttributes": ["title"] })).await; + let r = server.wait_task(task.uid()).await.succeeded(); + snapshot!(r["status"], @r###""succeeded""###); + + index + .search(json!({"q": "Dragon"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "color": [ + "green", + "red" + ] + } + ] + "###); + }) + .await; + + let (task, _status_code) = index.update_settings(json!({ "filterableAttributes": [] })).await; + let r = server.wait_task(task.uid()).await.succeeded(); + snapshot!(r["status"], @r###""succeeded""###); + + index + .search(json!({"q": "Dragon"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "color": [ + "green", + "red" + ] + } + ] + "###); + }) + .await; +} diff --git a/crates/meilisearch/tests/search/multi/mod.rs b/crates/meilisearch/tests/search/multi/mod.rs index 8a83fd3c0..b9eed56da 100644 --- a/crates/meilisearch/tests/search/multi/mod.rs +++ b/crates/meilisearch/tests/search/multi/mod.rs @@ -1,15 +1,82 @@ use meili_snap::{json_string, snapshot}; +use tokio::sync::OnceCell; use super::{DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS}; -use crate::common::Server; +use crate::common::index::Index; +use crate::common::{ + shared_index_with_documents, shared_index_with_nested_documents, + shared_index_with_score_documents, Server, Shared, +}; use crate::json; use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS}; mod proxy; +pub async fn shared_movies_index() -> &'static Index<'static, Shared> { + static INDEX: OnceCell> = OnceCell::const_new(); + INDEX + .get_or_init(|| async { + let server = Server::new_shared(); + let movies_index = server.unique_index_with_prefix("movies"); + + let documents = DOCUMENTS.clone(); + let (response, _code) = movies_index.add_documents(documents, None).await; + server.wait_task(response.uid()).await.succeeded(); + + let (value, _) = movies_index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + server.wait_task(value.uid()).await.succeeded(); + movies_index.to_shared() + }) + .await +} + +pub async fn shared_batman_index() -> &'static Index<'static, Shared> { + static INDEX: OnceCell> = OnceCell::const_new(); + INDEX + .get_or_init(|| async { + let server = Server::new_shared(); + let batman_index = server.unique_index_with_prefix("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (response, _code) = batman_index.add_documents(documents, None).await; + server.wait_task(response.uid()).await.succeeded(); + + let (value, _) = batman_index + .update_settings(json!({ + "sortableAttributes": ["id", "title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + server.wait_task(value.uid()).await.succeeded(); + batman_index.to_shared() + }) + .await +} + #[actix_rt::test] async fn search_empty_list() { - let server = Server::new().await; + let server = Server::new_shared(); let (response, code) = server.multi_search(json!({"queries": []})).await; snapshot!(code, @"200 OK"); @@ -22,14 +89,14 @@ async fn search_empty_list() { #[actix_rt::test] async fn federation_empty_list() { - let server = Server::new().await; + let server = Server::new_shared(); let (response, code) = server.multi_search(json!({"federation": {}, "queries": []})).await; snapshot!(code, @"200 OK"); - snapshot!(json_string!(response, {".processingTimeMs" => "[time]"}), @r###" + snapshot!(response, @r###" { "hits": [], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 0 @@ -39,7 +106,7 @@ async fn federation_empty_list() { #[actix_rt::test] async fn search_json_object() { - let server = Server::new().await; + let server = Server::new_shared(); let (response, code) = server.multi_search(json!({})).await; snapshot!(code, @"400 Bad Request"); @@ -55,7 +122,7 @@ async fn search_json_object() { #[actix_rt::test] async fn federation_no_queries() { - let server = Server::new().await; + let server = Server::new_shared(); let (response, code) = server.multi_search(json!({"federation": {}})).await; snapshot!(code, @"400 Bad Request"); @@ -71,7 +138,7 @@ async fn federation_no_queries() { #[actix_rt::test] async fn search_json_array() { - let server = Server::new().await; + let server = Server::new_shared(); let (response, code) = server.multi_search(json!([])).await; snapshot!(code, @"400 Bad Request"); @@ -87,24 +154,20 @@ async fn search_json_array() { #[actix_rt::test] async fn simple_search_single_index() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid": "test", "q": "glass"}, - {"indexUid": "test", "q": "captain"}, + {"indexUid": index.uid, "q": "glass"}, + {"indexUid": index.uid, "q": "captain"}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response["results"], { ".**.processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" [ { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "hits": [ { "title": "Gläss", @@ -116,13 +179,13 @@ async fn simple_search_single_index() { } ], "query": "glass", - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 1 }, { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "hits": [ { "title": "Captain Marvel", @@ -134,7 +197,7 @@ async fn simple_search_single_index() { } ], "query": "captain", - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 1 @@ -145,20 +208,16 @@ async fn simple_search_single_index() { #[actix_rt::test] async fn federation_single_search_single_index() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, + {"indexUid" : index.uid, "q": "glass"}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { @@ -169,13 +228,13 @@ async fn federation_single_search_single_index() { "red" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 1 @@ -185,30 +244,26 @@ async fn federation_single_search_single_index() { #[actix_rt::test] async fn federation_multiple_search_single_index() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = SCORE_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_score_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid": "test", "q": "the bat"}, - {"indexUid": "test", "q": "badman returns"}, - {"indexUid" : "test", "q": "batman"}, - {"indexUid": "test", "q": "batman returns"}, + {"indexUid": index.uid, "q": "the bat"}, + {"indexUid": index.uid, "q": "badman returns"}, + {"indexUid" : index.uid, "q": "batman"}, + {"indexUid": index.uid, "q": "batman returns"}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "title": "Batman", "id": "D", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -217,7 +272,7 @@ async fn federation_multiple_search_single_index() { "title": "Batman Returns", "id": "C", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 3, "weightedRankingScore": 1.0 } @@ -226,7 +281,7 @@ async fn federation_multiple_search_single_index() { "title": "Batman the dark knight returns: Part 1", "id": "A", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 0.9848484848484848 } @@ -235,7 +290,7 @@ async fn federation_multiple_search_single_index() { "title": "Batman the dark knight returns: Part 2", "id": "B", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 0.9848484848484848 } @@ -244,13 +299,13 @@ async fn federation_multiple_search_single_index() { "title": "Badman", "id": "E", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.5 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 5 @@ -260,21 +315,17 @@ async fn federation_multiple_search_single_index() { #[actix_rt::test] async fn federation_two_search_single_index() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "test", "q": "captain"}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid": index.uid, "q": "captain"}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { @@ -285,7 +336,7 @@ async fn federation_two_search_single_index() { "red" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -298,13 +349,13 @@ async fn federation_two_search_single_index() { "blue" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 2 @@ -314,12 +365,7 @@ async fn federation_two_search_single_index() { #[actix_rt::test] async fn simple_search_missing_index_uid() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); let (response, code) = server .multi_search(json!({"queries": [ @@ -327,7 +373,7 @@ async fn simple_search_missing_index_uid() { ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, @r###" + snapshot!(response, @r###" { "message": "Missing field `indexUid` inside `.queries[0]`", "code": "missing_index_uid", @@ -339,12 +385,7 @@ async fn simple_search_missing_index_uid() { #[actix_rt::test] async fn federation_simple_search_missing_index_uid() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -352,7 +393,7 @@ async fn federation_simple_search_missing_index_uid() { ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, @r###" + snapshot!(response, @r###" { "message": "Missing field `indexUid` inside `.queries[0]`", "code": "missing_index_uid", @@ -364,12 +405,7 @@ async fn federation_simple_search_missing_index_uid() { #[actix_rt::test] async fn simple_search_illegal_index_uid() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); let (response, code) = server .multi_search(json!({"queries": [ @@ -377,7 +413,7 @@ async fn simple_search_illegal_index_uid() { ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, @r###" + snapshot!(response, @r###" { "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", @@ -389,12 +425,7 @@ async fn simple_search_illegal_index_uid() { #[actix_rt::test] async fn federation_search_illegal_index_uid() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -402,7 +433,7 @@ async fn federation_search_illegal_index_uid() { ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, @r###" + snapshot!(response, @r###" { "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", @@ -414,29 +445,22 @@ async fn federation_search_illegal_index_uid() { #[actix_rt::test] async fn simple_search_two_indexes() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (add_task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(add_task.uid()).await.succeeded(); + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pésti"}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid": nested_index.uid, "q": "pésti"}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response["results"], { ".**.processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" [ { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "hits": [ { "title": "Gläss", @@ -448,13 +472,13 @@ async fn simple_search_two_indexes() { } ], "query": "glass", - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 1 }, { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "hits": [ { "id": 852, @@ -489,7 +513,7 @@ async fn simple_search_two_indexes() { } ], "query": "pésti", - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 2 @@ -500,26 +524,18 @@ async fn simple_search_two_indexes() { #[actix_rt::test] async fn federation_two_search_two_indexes() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pésti"}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid": nested_index.uid, "q": "pésti"}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { @@ -530,7 +546,7 @@ async fn federation_two_search_two_indexes() { "red" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -551,7 +567,7 @@ async fn federation_two_search_two_indexes() { ], "cattos": "pésti", "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -571,13 +587,13 @@ async fn federation_two_search_two_indexes() { "pestiféré" ], "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.7803030303030303 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3 @@ -587,40 +603,30 @@ async fn federation_two_search_two_indexes() { #[actix_rt::test] async fn federation_multiple_search_multiple_indexes() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let nested_index = shared_index_with_nested_documents().await; - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("score"); - let documents = SCORE_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let score_index = shared_index_with_score_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid" : "test", "q": "captain"}, - {"indexUid": "nested", "q": "pésti"}, - {"indexUid" : "test", "q": "Escape"}, - {"indexUid": "nested", "q": "jean"}, - {"indexUid": "score", "q": "jean"}, - {"indexUid": "test", "q": "the bat"}, - {"indexUid": "score", "q": "the bat"}, - {"indexUid": "score", "q": "badman returns"}, - {"indexUid" : "score", "q": "batman"}, - {"indexUid": "score", "q": "batman returns"}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid" : index.uid, "q": "captain"}, + {"indexUid": nested_index.uid, "q": "pésti"}, + {"indexUid" : index.uid, "q": "Escape"}, + {"indexUid": nested_index.uid, "q": "jean"}, + {"indexUid": score_index.uid, "q": "jean"}, + {"indexUid": index.uid, "q": "the bat"}, + {"indexUid": score_index.uid, "q": "the bat"}, + {"indexUid": score_index.uid, "q": "badman returns"}, + {"indexUid" : score_index.uid, "q": "batman"}, + {"indexUid": score_index.uid, "q": "batman returns"}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { @@ -631,7 +637,7 @@ async fn federation_multiple_search_multiple_indexes() { "red" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -652,7 +658,7 @@ async fn federation_multiple_search_multiple_indexes() { ], "cattos": "pésti", "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -661,7 +667,7 @@ async fn federation_multiple_search_multiple_indexes() { "title": "Batman", "id": "D", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 1.0 } @@ -670,7 +676,7 @@ async fn federation_multiple_search_multiple_indexes() { "title": "Batman Returns", "id": "C", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 10, "weightedRankingScore": 1.0 } @@ -683,7 +689,7 @@ async fn federation_multiple_search_multiple_indexes() { "blue" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 } @@ -696,7 +702,7 @@ async fn federation_multiple_search_multiple_indexes() { "red" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 3, "weightedRankingScore": 0.9848484848484848 } @@ -720,7 +726,7 @@ async fn federation_multiple_search_multiple_indexes() { "gomez" ], "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 4, "weightedRankingScore": 0.9848484848484848 } @@ -729,7 +735,7 @@ async fn federation_multiple_search_multiple_indexes() { "title": "Batman the dark knight returns: Part 1", "id": "A", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -738,7 +744,7 @@ async fn federation_multiple_search_multiple_indexes() { "title": "Batman the dark knight returns: Part 2", "id": "B", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -758,7 +764,7 @@ async fn federation_multiple_search_multiple_indexes() { "pestiféré" ], "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 0.7803030303030303 } @@ -767,7 +773,7 @@ async fn federation_multiple_search_multiple_indexes() { "title": "Badman", "id": "E", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 8, "weightedRankingScore": 0.5 } @@ -780,13 +786,13 @@ async fn federation_multiple_search_multiple_indexes() { "red" ], "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 6, "weightedRankingScore": 0.4166666666666667 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 12 @@ -796,16 +802,12 @@ async fn federation_multiple_search_multiple_indexes() { #[actix_rt::test] async fn search_one_index_doesnt_exist() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass"}, + {"indexUid" : index.uid, "q": "glass"}, {"indexUid": "nested", "q": "pésti"}, ]})) .await; @@ -822,16 +824,12 @@ async fn search_one_index_doesnt_exist() { #[actix_rt::test] async fn federation_one_index_doesnt_exist() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, + {"indexUid" : index.uid, "q": "glass"}, {"indexUid": "nested", "q": "pésti"}, ]})) .await; @@ -848,7 +846,7 @@ async fn federation_one_index_doesnt_exist() { #[actix_rt::test] async fn search_multiple_indexes_dont_exist() { - let server = Server::new().await; + let server = Server::new_shared(); let (response, code) = server .multi_search(json!({"queries": [ @@ -869,12 +867,15 @@ async fn search_multiple_indexes_dont_exist() { #[actix_rt::test] async fn federation_multiple_indexes_dont_exist() { - let server = Server::new().await; + let server = Server::new_shared(); + + let index_1 = server.unique_index_with_prefix("index_1"); + let index_2 = server.unique_index_with_prefix("index_2"); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pésti"}, + {"indexUid" : index_1.uid, "q": "glass"}, + {"indexUid": index_2.uid, "q": "pésti"}, ]})) .await; snapshot!(code, @"400 Bad Request"); @@ -882,7 +883,7 @@ async fn federation_multiple_indexes_dont_exist() { // the query index is the lowest index with that index snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: Index `nested` not found.", + "message": "Inside `.queries[0]`: Index `index_1-[uuid]` not found.", "code": "index_not_found", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#index_not_found" @@ -892,29 +893,20 @@ async fn federation_multiple_indexes_dont_exist() { #[actix_rt::test] async fn search_one_query_error() { - let server = Server::new().await; - - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass", "facets": ["title"]}, - {"indexUid": "nested", "q": "pésti"}, + {"indexUid" : index.uid, "q": "glass", "facets": ["color"]}, + {"indexUid": nested_index.uid, "q": "pésti"}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[0]`: Invalid facet distribution: Attribute `title` is not filterable. This index does not have configured filterable attributes.", + "message": "Inside `.queries[0]`: Invalid facet distribution: Attribute `color` is not filterable. Available filterable attributes patterns are: `id, title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -924,29 +916,21 @@ async fn search_one_query_error() { #[actix_rt::test] async fn federation_one_query_error() { - let server = Server::new().await; + let server = Server::new_shared(); + let index = shared_index_with_documents().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pésti", "filter": ["title = toto"]}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid": nested_index.uid, "q": "pésti", "filter": ["title = toto"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: Index `nested`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto", + "message": "Inside `.queries[1]`: Index `SHARED_NESTED_DOCUMENTS`: Attribute `title` is not filterable. Available filterable attribute patterns are: `cattos`, `doggos`, `father`.\n1:6 title = toto", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -956,29 +940,20 @@ async fn federation_one_query_error() { #[actix_rt::test] async fn federation_one_query_sort_error() { - let server = Server::new().await; - - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pésti", "sort": ["doggos:desc"]}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid": nested_index.uid, "q": "pésti", "sort": ["mother:desc"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.", + "message": "Inside `.queries[1]`: Index `SHARED_NESTED_DOCUMENTS`: Attribute `mother` is not sortable. Available sortable attributes are: `doggos`.", "code": "invalid_search_sort", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_sort" @@ -988,29 +963,20 @@ async fn federation_one_query_sort_error() { #[actix_rt::test] async fn search_multiple_query_errors() { - let server = Server::new().await; - - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass", "facets": ["title"]}, - {"indexUid": "nested", "q": "pésti", "facets": ["doggos"]}, + {"indexUid" : index.uid, "q": "glass", "facets": ["color"]}, + {"indexUid": nested_index.uid, "q": "pésti", "facets": ["doggos"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[0]`: Invalid facet distribution: Attribute `title` is not filterable. This index does not have configured filterable attributes.", + "message": "Inside `.queries[0]`: Invalid facet distribution: Attribute `color` is not filterable. Available filterable attributes patterns are: `id, title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -1020,29 +986,20 @@ async fn search_multiple_query_errors() { #[actix_rt::test] async fn federation_multiple_query_errors() { - let server = Server::new().await; - - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass", "filter": ["title = toto"]}, - {"indexUid": "nested", "q": "pésti", "filter": ["doggos IN [intel, kefir]"]}, + {"indexUid" : index.uid, "q": "glass", "filter": ["color = toto"]}, + {"indexUid": nested_index.uid, "q": "pésti", "filter": ["mother IN [intel, kefir]"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto", + "message": "Inside `.queries[0]`: Index `SHARED_DOCUMENTS`: Attribute `color` is not filterable. Available filterable attribute patterns are: `id`, `title`.\n1:6 color = toto", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -1052,29 +1009,20 @@ async fn federation_multiple_query_errors() { #[actix_rt::test] async fn federation_multiple_query_sort_errors() { - let server = Server::new().await; - - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass", "sort": ["title:desc"]}, - {"indexUid": "nested", "q": "pésti", "sort": ["doggos:desc"]}, + {"indexUid" : index.uid, "q": "glass", "sort": ["color:desc"]}, + {"indexUid": nested_index.uid, "q": "pésti", "sort": ["doggos:desc"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not sortable. This index does not have configured sortable attributes.", + "message": "Inside `.queries[0]`: Index `SHARED_DOCUMENTS`: Attribute `color` is not sortable. Available sortable attributes are: `id, title`.", "code": "invalid_search_sort", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_sort" @@ -1084,30 +1032,21 @@ async fn federation_multiple_query_sort_errors() { #[actix_rt::test] async fn federation_multiple_query_errors_interleaved() { - let server = Server::new().await; - - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pésti", "filter": ["doggos IN [intel, kefir]"]}, - {"indexUid" : "test", "q": "glass", "filter": ["title = toto"]}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid": nested_index.uid, "q": "pésti", "filter": ["mother IN [intel, kefir]"]}, + {"indexUid" : index.uid, "q": "glass", "filter": ["title = toto"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]", + "message": "Inside `.queries[1]`: Index `SHARED_NESTED_DOCUMENTS`: Attribute `mother` is not filterable. Available filterable attribute patterns are: `cattos`, `doggos`, `father`.\n1:7 mother IN [intel, kefir]", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -1117,30 +1056,21 @@ async fn federation_multiple_query_errors_interleaved() { #[actix_rt::test] async fn federation_multiple_query_sort_errors_interleaved() { - let server = Server::new().await; - - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pésti", "sort": ["doggos:desc"]}, - {"indexUid" : "test", "q": "glass", "sort": ["title:desc"]}, + {"indexUid" : index.uid, "q": "glass"}, + {"indexUid": nested_index.uid, "q": "pésti", "sort": ["mother:desc"]}, + {"indexUid" : index.uid, "q": "glass", "sort": ["title:desc"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.", + "message": "Inside `.queries[1]`: Index `SHARED_NESTED_DOCUMENTS`: Attribute `mother` is not sortable. Available sortable attributes are: `doggos`.", "code": "invalid_search_sort", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_sort" @@ -1150,29 +1080,28 @@ async fn federation_multiple_query_sort_errors_interleaved() { #[actix_rt::test] async fn federation_filter() { - let server = Server::new().await; - - let index = server.index("fruits"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), ) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, - {"indexUid": "fruits", "q": "apple red", "showRankingScore": true}, + {"indexUid" : index.uid, "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, + {"indexUid": index.uid, "q": "apple red", "showRankingScore": true}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "hits": [ { @@ -1180,7 +1109,7 @@ async fn federation_filter() { "id": "red-delicious-boosted", "BOOST": true, "_federation": { - "indexUid": "fruits", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 2.7281746031746033 }, @@ -1191,7 +1120,7 @@ async fn federation_filter() { "id": "green-apple-boosted", "BOOST": true, "_federation": { - "indexUid": "fruits", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.318181818181818 }, @@ -1201,14 +1130,14 @@ async fn federation_filter() { "name": "Red apple gala", "id": "red-apple-gala", "_federation": { - "indexUid": "fruits", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.953042328042328 }, "_rankingScore": 0.953042328042328 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3 @@ -1218,13 +1147,12 @@ async fn federation_filter() { #[actix_rt::test] async fn federation_sort_same_indexes_same_criterion_same_direction() { - let server = Server::new().await; - - let index = server.index("nested"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1239,17 +1167,17 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { ] })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - // two identical placeholder search should have all results from first query + // two identical placeholder searches should have all results from the first query let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["mother:asc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { @@ -1268,7 +1196,7 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { ], "cattos": "pésti", "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1282,7 +1210,7 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { "enigma" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1303,7 +1231,7 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { "pestiféré" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1328,14 +1256,14 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { "gomez" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, "_rankingScore": 1.0 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 4 @@ -1345,12 +1273,12 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { // mix and match query let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "jean", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "jean", "sort": ["mother:asc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { @@ -1369,7 +1297,7 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { ], "cattos": "pésti", "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1390,7 +1318,7 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { "pestiféré" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.7803030303030303 }, @@ -1415,14 +1343,14 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { "gomez" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 }, "_rankingScore": 0.9848484848484848 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3 @@ -1432,13 +1360,12 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { #[actix_rt::test] async fn federation_sort_same_indexes_same_criterion_opposite_direction() { - let server = Server::new().await; - - let index = server.index("nested"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1453,19 +1380,19 @@ async fn federation_sort_same_indexes_same_criterion_opposite_direction() { ] })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - // two identical placeholder search should have all results from first query + // two identical placeholder searches should have all results from the first query let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "", "sort": ["mother:desc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["mother:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `mother`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `[uuid].rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `[uuid].rankingRules[0]`: descending sort rule(s) on field `mother`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -1475,14 +1402,14 @@ async fn federation_sort_same_indexes_same_criterion_opposite_direction() { // mix and match query: should be ranked by ranking score let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "jean", "sort": ["mother:desc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "jean", "sort": ["mother:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `mother`\n - cannot compare two sort rules in opposite directions\n", + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `[uuid].rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `[uuid].rankingRules[0]`: descending sort rule(s) on field `mother`\n - cannot compare two sort rules in opposite directions\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -1492,13 +1419,12 @@ async fn federation_sort_same_indexes_same_criterion_opposite_direction() { #[actix_rt::test] async fn federation_sort_same_indexes_different_criterion_same_direction() { - let server = Server::new().await; - - let index = server.index("nested"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1513,17 +1439,17 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { ] })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - // return mothers and fathers ordered accross fields. + // return mothers and fathers ordered across fields. let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "", "sort": ["father:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["father:asc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { @@ -1542,7 +1468,7 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { ], "cattos": "pésti", "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -1567,7 +1493,7 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { "gomez" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -1581,7 +1507,7 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { "enigma" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1602,14 +1528,14 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { "pestiféré" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, "_rankingScore": 1.0 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 4 @@ -1619,13 +1545,13 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { // mix and match query: will be sorted across mother and father names let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "pésti", "sort": ["mother:desc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "jean-bap", "sort": ["father:desc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "jea", "sort": ["father:desc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "pésti", "sort": ["mother:desc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "jean-bap", "sort": ["father:desc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "jea", "sort": ["father:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { @@ -1643,7 +1569,7 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { "pestiféré" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.7803030303030303 }, @@ -1665,7 +1591,7 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { ], "cattos": "pésti", "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1690,14 +1616,14 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { "gomez" ], "_federation": { - "indexUid": "nested", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9991181657848324 }, "_rankingScore": 0.9991181657848324 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3 @@ -1707,13 +1633,12 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { #[actix_rt::test] async fn federation_sort_same_indexes_different_criterion_opposite_direction() { - let server = Server::new().await; - - let index = server.index("nested"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("nested"); let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1728,19 +1653,19 @@ async fn federation_sort_same_indexes_different_criterion_opposite_direction() { ] })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - // two identical placeholder search should have all results from first query + // two identical placeholder searches should have all results from the first query let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "", "sort": ["father:desc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "", "sort": ["father:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `father`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested-[uuid].rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested-[uuid].rankingRules[0]`: descending sort rule(s) on field `father`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -1750,14 +1675,14 @@ async fn federation_sort_same_indexes_different_criterion_opposite_direction() { // mix and match query: should be ranked by ranking score let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "nested", "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, - {"indexUid" : "nested", "q": "jean", "sort": ["father:desc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : index.uid, "q": "jean", "sort": ["father:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `father`\n - cannot compare two sort rules in opposite directions\n", + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested-[uuid].rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested-[uuid].rankingRules[0]`: descending sort rule(s) on field `father`\n - cannot compare two sort rules in opposite directions\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -1767,66 +1692,26 @@ async fn federation_sort_same_indexes_different_criterion_opposite_direction() { #[actix_rt::test] async fn federation_sort_different_indexes_same_criterion_same_direction() { - let server = Server::new().await; + let server = Server::new_shared(); + let movies_index = shared_movies_index().await; + let batman_index = shared_batman_index().await; - let index = server.index("movies"); - - let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - let index = server.index("batman"); - - let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - // return titles ordered accross indexes + // return titles ordered across indexes let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "", "sort": ["title:asc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Badman", "id": "E", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -1836,7 +1721,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman", "id": "D", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -1846,7 +1731,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman Returns", "id": "C", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -1856,7 +1741,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman the dark knight returns: Part 1", "id": "A", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -1866,7 +1751,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman the dark knight returns: Part 2", "id": "B", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -1880,7 +1765,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1894,7 +1779,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1908,7 +1793,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1922,7 +1807,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -1936,14 +1821,14 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, "_rankingScore": 1.0 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 10 @@ -1953,13 +1838,13 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { // mix and match query: will be sorted across indexes let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "showRankingScore": true }, - {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "badman returns", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "the bat", "sort": ["title:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { @@ -1970,7 +1855,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 }, @@ -1980,7 +1865,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman the dark knight returns: Part 2", "id": "B", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9528218694885362 }, @@ -1990,7 +1875,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman the dark knight returns: Part 1", "id": "A", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9528218694885362 }, @@ -2000,7 +1885,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman Returns", "id": "C", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8317901234567902 }, @@ -2010,7 +1895,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Batman", "id": "D", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.23106060606060605 }, @@ -2020,14 +1905,14 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { "title": "Badman", "id": "E", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.5 }, "_rankingScore": 0.5 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 6 @@ -2037,66 +1922,28 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { #[actix_rt::test] async fn federation_sort_different_ranking_rules() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("movies"); + let movies_index = shared_movies_index().await; - let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + let batman_index = shared_index_with_score_documents().await; - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - let index = server.index("batman"); - - let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "words", - "typo", - "proximity", - "attribute", - "sort", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - // return titles ordered accross indexes + // return titles ordered across indexes let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "", "sort": ["title:asc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Badman", "id": "E", "_federation": { - "indexUid": "batman", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2106,7 +1953,7 @@ async fn federation_sort_different_ranking_rules() { "title": "Batman", "id": "D", "_federation": { - "indexUid": "batman", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2116,7 +1963,7 @@ async fn federation_sort_different_ranking_rules() { "title": "Batman Returns", "id": "C", "_federation": { - "indexUid": "batman", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2126,7 +1973,7 @@ async fn federation_sort_different_ranking_rules() { "title": "Batman the dark knight returns: Part 1", "id": "A", "_federation": { - "indexUid": "batman", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2136,7 +1983,7 @@ async fn federation_sort_different_ranking_rules() { "title": "Batman the dark knight returns: Part 2", "id": "B", "_federation": { - "indexUid": "batman", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2150,7 +1997,7 @@ async fn federation_sort_different_ranking_rules() { "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2164,7 +2011,7 @@ async fn federation_sort_different_ranking_rules() { "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2178,7 +2025,7 @@ async fn federation_sort_different_ranking_rules() { "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2192,7 +2039,7 @@ async fn federation_sort_different_ranking_rules() { "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2206,14 +2053,14 @@ async fn federation_sort_different_ranking_rules() { "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, "_rankingScore": 1.0 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 10 @@ -2223,15 +2070,15 @@ async fn federation_sort_different_ranking_rules() { // mix and match query: order difficult to understand let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "showRankingScore": true }, - {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "badman returns", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "the bat", "sort": ["title:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2]`, `batman.rankingRules[0..=3]`: relevancy rule(s) words, typo, proximity, attribute\n 2. `queries[1].sort[0]`, `movies.rankingRules[0]`: descending sort rule(s) on field `title`\n - cannot compare a relevancy rule with a sort rule\n", + "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2]`, `SHARED_SCORE_DOCUMENTS.rankingRules[0..=3]`: relevancy rule(s) words, typo, proximity, attribute\n 2. `queries[1].sort[0]`, `movies-[uuid].rankingRules[0]`: descending sort rule(s) on field `title`\n - cannot compare a relevancy rule with a sort rule\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -2241,61 +2088,21 @@ async fn federation_sort_different_ranking_rules() { #[actix_rt::test] async fn federation_sort_different_indexes_same_criterion_opposite_direction() { - let server = Server::new().await; - - let index = server.index("movies"); - - let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - let index = server.index("batman"); - - let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); + let server = Server::new_shared(); + let movies_index = shared_movies_index().await; + let batman_index = shared_batman_index().await; // all results from query 0 let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "", "sort": ["title:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[0]`: The results of queries #1 and #0 are incompatible: \n 1. `queries[1].sort[0]`, `batman.rankingRules[0]`: descending sort rule(s) on field `title`\n 2. `queries[0].sort[0]`, `movies.rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "message": "Inside `.queries[0]`: The results of queries #1 and #0 are incompatible: \n 1. `queries[1].sort[0]`, `batman-[uuid].rankingRules[0]`: descending sort rule(s) on field `title`\n 2. `queries[0].sort[0]`, `movies-[uuid].rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -2305,15 +2112,15 @@ async fn federation_sort_different_indexes_same_criterion_opposite_direction() { // mix and match query: will be sorted by ranking score let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "batman", "q": "badman returns", "sort": ["title:asc"], "showRankingScore": true }, - {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "the bat", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "badman returns", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "the bat", "sort": ["title:asc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2].sort[0]`, `batman.rankingRules[0]`: ascending sort rule(s) on field `title`\n 2. `queries[1].sort[0]`, `movies.rankingRules[0]`: descending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n", + "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2].sort[0]`, `batman-[uuid].rankingRules[0]`: ascending sort rule(s) on field `title`\n 2. `queries[1].sort[0]`, `movies-[uuid].rankingRules[0]`: descending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -2323,66 +2130,26 @@ async fn federation_sort_different_indexes_same_criterion_opposite_direction() { #[actix_rt::test] async fn federation_sort_different_indexes_different_criterion_same_direction() { - let server = Server::new().await; + let server = Server::new_shared(); + let movies_index = shared_movies_index().await; + let batman_index = shared_batman_index().await; - let index = server.index("movies"); - - let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - let index = server.index("batman"); - - let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["id"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - // return titles ordered accross indexes + // return titles ordered across indexes let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "", "sort": ["id:asc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "", "sort": ["id:asc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Batman the dark knight returns: Part 1", "id": "A", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2392,7 +2159,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Batman the dark knight returns: Part 2", "id": "B", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2402,7 +2169,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Batman Returns", "id": "C", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2416,7 +2183,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2426,7 +2193,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Batman", "id": "D", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2436,7 +2203,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Badman", "id": "E", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 }, @@ -2450,7 +2217,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2464,7 +2231,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2478,7 +2245,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "red" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -2492,14 +2259,14 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 }, "_rankingScore": 1.0 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 10 @@ -2509,20 +2276,20 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() // mix and match query: will be sorted across indexes and criterion let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "batman", "q": "badman returns", "sort": ["id:desc"], "showRankingScore": true }, - {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "the bat", "sort": ["id:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "badman returns", "sort": ["id:desc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "the bat", "sort": ["id:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Badman", "id": "E", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.5 }, @@ -2532,7 +2299,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Batman", "id": "D", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.23106060606060605 }, @@ -2546,7 +2313,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "blue" ], "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 }, @@ -2556,7 +2323,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Batman Returns", "id": "C", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8317901234567902 }, @@ -2566,7 +2333,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Batman the dark knight returns: Part 2", "id": "B", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9528218694885362 }, @@ -2576,14 +2343,14 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() "title": "Batman the dark knight returns: Part 1", "id": "A", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9528218694885362 }, "_rankingScore": 0.9528218694885362 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 6 @@ -2593,61 +2360,21 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() #[actix_rt::test] async fn federation_sort_different_indexes_different_criterion_opposite_direction() { - let server = Server::new().await; - - let index = server.index("movies"); - - let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - let index = server.index("batman"); - - let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["id"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); + let server = Server::new_shared(); + let movies_index = shared_movies_index().await; + let batman_index = shared_batman_index().await; // all results from query 0 first let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "", "sort": ["id:desc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "", "sort": ["id:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[0]`: The results of queries #1 and #0 are incompatible: \n 1. `queries[1].sort[0]`, `batman.rankingRules[0]`: descending sort rule(s) on field `id`\n 2. `queries[0].sort[0]`, `movies.rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "message": "Inside `.queries[0]`: The results of queries #1 and #0 are incompatible: \n 1. `queries[1].sort[0]`, `batman-[uuid].rankingRules[0]`: descending sort rule(s) on field `id`\n 2. `queries[0].sort[0]`, `movies-[uuid].rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -2657,15 +2384,15 @@ async fn federation_sort_different_indexes_different_criterion_opposite_directio // mix and match query: more or less by ranking score let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "batman", "q": "badman returns", "sort": ["id:desc"], "showRankingScore": true }, - {"indexUid" : "movies", "q": "captain", "sort": ["title:asc"], "showRankingScore": true }, - {"indexUid" : "batman", "q": "the bat", "sort": ["id:desc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "badman returns", "sort": ["id:desc"], "showRankingScore": true }, + {"indexUid" : movies_index.uid, "q": "captain", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : batman_index.uid, "q": "the bat", "sort": ["id:desc"], "showRankingScore": true }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2].sort[0]`, `batman.rankingRules[0]`: descending sort rule(s) on field `id`\n 2. `queries[1].sort[0]`, `movies.rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n", + "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2].sort[0]`, `batman-[uuid].rankingRules[0]`: descending sort rule(s) on field `id`\n 2. `queries[1].sort[0]`, `movies-[uuid].rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n", "code": "invalid_multi_search_query_ranking_rules", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" @@ -2675,46 +2402,35 @@ async fn federation_sort_different_indexes_different_criterion_opposite_directio #[actix_rt::test] async fn federation_limit_offset() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; + let score_index = shared_index_with_score_documents().await; - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("score"); - let documents = SCORE_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); { let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "title": "Gläss", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -2722,7 +2438,7 @@ async fn federation_limit_offset() { { "id": 852, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -2730,7 +2446,7 @@ async fn federation_limit_offset() { { "title": "Batman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 1.0 } @@ -2738,7 +2454,7 @@ async fn federation_limit_offset() { { "title": "Batman Returns", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 10, "weightedRankingScore": 1.0 } @@ -2746,7 +2462,7 @@ async fn federation_limit_offset() { { "title": "Captain Marvel", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 } @@ -2754,7 +2470,7 @@ async fn federation_limit_offset() { { "title": "Escape Room", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 3, "weightedRankingScore": 0.9848484848484848 } @@ -2762,7 +2478,7 @@ async fn federation_limit_offset() { { "id": 951, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 4, "weightedRankingScore": 0.9848484848484848 } @@ -2770,7 +2486,7 @@ async fn federation_limit_offset() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -2778,7 +2494,7 @@ async fn federation_limit_offset() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -2786,7 +2502,7 @@ async fn federation_limit_offset() { { "id": 654, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 0.7803030303030303 } @@ -2794,7 +2510,7 @@ async fn federation_limit_offset() { { "title": "Badman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 8, "weightedRankingScore": 0.5 } @@ -2802,13 +2518,13 @@ async fn federation_limit_offset() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 6, "weightedRankingScore": 0.4166666666666667 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 12 @@ -2819,33 +2535,33 @@ async fn federation_limit_offset() { { let (response, code) = server .multi_search(json!({"federation": {"limit": 1}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "title": "Gläss", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 1, "offset": 0, "estimatedTotalHits": 12 @@ -2856,27 +2572,27 @@ async fn federation_limit_offset() { { let (response, code) = server .multi_search(json!({"federation": {"offset": 2}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "title": "Batman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 1.0 } @@ -2884,7 +2600,7 @@ async fn federation_limit_offset() { { "title": "Batman Returns", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 10, "weightedRankingScore": 1.0 } @@ -2892,7 +2608,7 @@ async fn federation_limit_offset() { { "title": "Captain Marvel", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 } @@ -2900,7 +2616,7 @@ async fn federation_limit_offset() { { "title": "Escape Room", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 3, "weightedRankingScore": 0.9848484848484848 } @@ -2908,7 +2624,7 @@ async fn federation_limit_offset() { { "id": 951, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 4, "weightedRankingScore": 0.9848484848484848 } @@ -2916,7 +2632,7 @@ async fn federation_limit_offset() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -2924,7 +2640,7 @@ async fn federation_limit_offset() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -2932,7 +2648,7 @@ async fn federation_limit_offset() { { "id": 654, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 0.7803030303030303 } @@ -2940,7 +2656,7 @@ async fn federation_limit_offset() { { "title": "Badman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 8, "weightedRankingScore": 0.5 } @@ -2948,13 +2664,13 @@ async fn federation_limit_offset() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 6, "weightedRankingScore": 0.4166666666666667 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 2, "estimatedTotalHits": 12 @@ -2965,24 +2681,24 @@ async fn federation_limit_offset() { { let (response, code) = server .multi_search(json!({"federation": {"offset": 12}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 12, "estimatedTotalHits": 12 @@ -2993,46 +2709,35 @@ async fn federation_limit_offset() { #[actix_rt::test] async fn federation_formatting() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = shared_index_with_documents().await; + let nested_index = shared_index_with_nested_documents().await; + let score_index = shared_index_with_score_documents().await; - let documents = DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("nested"); - let documents = NESTED_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); - - let index = server.index("score"); - let documents = SCORE_DOCUMENTS.clone(); - let (task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); { let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "title": "Gläss", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 }, @@ -3043,7 +2748,7 @@ async fn federation_formatting() { { "id": 852, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -3051,7 +2756,7 @@ async fn federation_formatting() { { "title": "Batman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 1.0 }, @@ -3062,7 +2767,7 @@ async fn federation_formatting() { { "title": "Batman Returns", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 10, "weightedRankingScore": 1.0 }, @@ -3073,7 +2778,7 @@ async fn federation_formatting() { { "title": "Captain Marvel", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 }, @@ -3084,7 +2789,7 @@ async fn federation_formatting() { { "title": "Escape Room", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 3, "weightedRankingScore": 0.9848484848484848 }, @@ -3095,7 +2800,7 @@ async fn federation_formatting() { { "id": 951, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 4, "weightedRankingScore": 0.9848484848484848 } @@ -3103,7 +2808,7 @@ async fn federation_formatting() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 }, @@ -3114,7 +2819,7 @@ async fn federation_formatting() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 }, @@ -3125,7 +2830,7 @@ async fn federation_formatting() { { "id": 654, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 0.7803030303030303 } @@ -3133,7 +2838,7 @@ async fn federation_formatting() { { "title": "Badman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 8, "weightedRankingScore": 0.5 }, @@ -3144,7 +2849,7 @@ async fn federation_formatting() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 6, "weightedRankingScore": 0.4166666666666667 }, @@ -3153,7 +2858,7 @@ async fn federation_formatting() { } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 12 @@ -3164,33 +2869,33 @@ async fn federation_formatting() { { let (response, code) = server .multi_search(json!({"federation": {"limit": 1}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "title": "Gläss", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 0, "weightedRankingScore": 1.0 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 1, "offset": 0, "estimatedTotalHits": 12 @@ -3201,27 +2906,27 @@ async fn federation_formatting() { { let (response, code) = server .multi_search(json!({"federation": {"offset": 2}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "title": "Batman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 1.0 } @@ -3229,7 +2934,7 @@ async fn federation_formatting() { { "title": "Batman Returns", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 10, "weightedRankingScore": 1.0 } @@ -3237,7 +2942,7 @@ async fn federation_formatting() { { "title": "Captain Marvel", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 1, "weightedRankingScore": 0.9848484848484848 } @@ -3245,7 +2950,7 @@ async fn federation_formatting() { { "title": "Escape Room", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 3, "weightedRankingScore": 0.9848484848484848 } @@ -3253,7 +2958,7 @@ async fn federation_formatting() { { "id": 951, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 4, "weightedRankingScore": 0.9848484848484848 } @@ -3261,7 +2966,7 @@ async fn federation_formatting() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -3269,7 +2974,7 @@ async fn federation_formatting() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 9, "weightedRankingScore": 0.9848484848484848 } @@ -3277,7 +2982,7 @@ async fn federation_formatting() { { "id": 654, "_federation": { - "indexUid": "nested", + "indexUid": "SHARED_NESTED_DOCUMENTS", "queriesPosition": 2, "weightedRankingScore": 0.7803030303030303 } @@ -3285,7 +2990,7 @@ async fn federation_formatting() { { "title": "Badman", "_federation": { - "indexUid": "score", + "indexUid": "SHARED_SCORE_DOCUMENTS", "queriesPosition": 8, "weightedRankingScore": 0.5 } @@ -3293,13 +2998,13 @@ async fn federation_formatting() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "test", + "indexUid": "SHARED_DOCUMENTS", "queriesPosition": 6, "weightedRankingScore": 0.4166666666666667 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 2, "estimatedTotalHits": 12 @@ -3310,24 +3015,24 @@ async fn federation_formatting() { { let (response, code) = server .multi_search(json!({"federation": {"offset": 12}, "queries": [ - {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, - {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, - {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, - {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, - {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, - {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, - {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, - {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : index.uid, "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid" : nested_index.uid, "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid" : score_index.uid, "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid" : index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid" : score_index.uid, "q": "batman returns", "attributesToRetrieve": ["title"]}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 12, "estimatedTotalHits": 12 @@ -3338,29 +3043,28 @@ async fn federation_formatting() { #[actix_rt::test] async fn federation_invalid_weight() { - let server = Server::new().await; - - let index = server.index("fruits"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), ) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, - {"indexUid": "fruits", "q": "apple red", "showRankingScore": true, "federationOptions": {"weight": -12}}, + {"indexUid" : index.uid, "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, + {"indexUid": index.uid, "q": "apple red", "showRankingScore": true, "federationOptions": {"weight": -12}}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "message": "Invalid value at `.queries[1].federationOptions.weight`: the value of `weight` is invalid, expected a positive float (>= 0.0).", "code": "invalid_multi_search_weight", @@ -3372,29 +3076,29 @@ async fn federation_invalid_weight() { #[actix_rt::test] async fn federation_null_weight() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("fruits"); + let index = server.unique_index_with_prefix("fruits"); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), ) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, - {"indexUid": "fruits", "q": "apple red", "showRankingScore": true, "federationOptions": {"weight": 0.0} }, + {"indexUid" : index.uid, "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, + {"indexUid": index.uid, "q": "apple red", "showRankingScore": true, "federationOptions": {"weight": 0.0} }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { @@ -3402,7 +3106,7 @@ async fn federation_null_weight() { "id": "red-delicious-boosted", "BOOST": true, "_federation": { - "indexUid": "fruits", + "indexUid": "fruits-[uuid]", "queriesPosition": 0, "weightedRankingScore": 2.7281746031746033 }, @@ -3413,7 +3117,7 @@ async fn federation_null_weight() { "id": "green-apple-boosted", "BOOST": true, "_federation": { - "indexUid": "fruits", + "indexUid": "fruits-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.318181818181818 }, @@ -3423,14 +3127,14 @@ async fn federation_null_weight() { "name": "Red apple gala", "id": "red-apple-gala", "_federation": { - "indexUid": "fruits", + "indexUid": "fruits-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.0 }, "_rankingScore": 0.953042328042328 } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3 @@ -3440,23 +3144,23 @@ async fn federation_null_weight() { #[actix_rt::test] async fn federation_federated_contains_pagination() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("fruits"); + let index = server.unique_index_with_prefix("fruits"); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); // fail when a federated query contains "limit" let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red", "limit": 5}, + {"indexUid" : index.uid, "q": "apple red"}, + {"indexUid": index.uid, "q": "apple red", "limit": 5}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `limit` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", @@ -3467,12 +3171,12 @@ async fn federation_federated_contains_pagination() { // fail when a federated query contains "offset" let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red", "offset": 5}, + {"indexUid" : index.uid, "q": "apple red"}, + {"indexUid": index.uid, "q": "apple red", "offset": 5}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `offset` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", @@ -3483,12 +3187,12 @@ async fn federation_federated_contains_pagination() { // fail when a federated query contains "page" let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red", "page": 2}, + {"indexUid" : index.uid, "q": "apple red"}, + {"indexUid": index.uid, "q": "apple red", "page": 2}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `page` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", @@ -3499,12 +3203,12 @@ async fn federation_federated_contains_pagination() { // fail when a federated query contains "hitsPerPage" let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red", "hitsPerPage": 5}, + {"indexUid" : index.uid, "q": "apple red"}, + {"indexUid": index.uid, "q": "apple red", "hitsPerPage": 5}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `hitsPerPage` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", @@ -3516,9 +3220,9 @@ async fn federation_federated_contains_pagination() { #[actix_rt::test] async fn federation_federated_contains_facets() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("fruits"); + let index = server.unique_index_with_prefix("fruits"); let (value, _) = index .update_settings( @@ -3526,28 +3230,28 @@ async fn federation_federated_contains_facets() { ) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); // empty facets are actually OK let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red", "facets": []}, + {"indexUid" : index.uid, "q": "apple red"}, + {"indexUid": index.uid, "q": "apple red", "facets": []}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "name": "Red apple gala", "id": "red-apple-gala", "_federation": { - "indexUid": "fruits", + "indexUid": "fruits-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.953042328042328 } @@ -3557,7 +3261,7 @@ async fn federation_federated_contains_facets() { "id": "red-delicious-boosted", "BOOST": true, "_federation": { - "indexUid": "fruits", + "indexUid": "fruits-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9093915343915344 } @@ -3567,13 +3271,13 @@ async fn federation_federated_contains_facets() { "id": "green-apple-boosted", "BOOST": true, "_federation": { - "indexUid": "fruits", + "indexUid": "fruits-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.4393939393939394 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3 @@ -3583,14 +3287,14 @@ async fn federation_federated_contains_facets() { // fails let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red", "facets": ["BOOSTED"]}, + {"indexUid": index.uid, "q": "apple red"}, + {"indexUid": index.uid, "q": "apple red", "facets": ["BOOSTED"]}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #1 or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.fruits: [\"BOOSTED\"]` for facets in federated search", + "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #1 or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.fruits-[uuid]: [\"BOOSTED\"]` for facets in federated search", "code": "invalid_multi_search_query_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_facets" @@ -3600,54 +3304,55 @@ async fn federation_federated_contains_facets() { #[actix_rt::test] async fn federation_non_faceted_for_an_index() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("fruits"); + let fruits_index = server.unique_index_with_prefix("fruits"); - let (value, _) = index + let (value, _) = fruits_index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id", "name"]}), ) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - let index = server.index("fruits-no-name"); + let fruits_no_name_index = server.unique_index_with_prefix("fruits-no-name"); - let (value, _) = index + let (value, _) = fruits_no_name_index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id"]}), ) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - let index = server.index("fruits-no-facets"); + let fruits_no_facets_index = server.unique_index_with_prefix("fruits-no-facets"); - let (value, _) = index.update_settings(json!({"searchableAttributes": ["name"]})).await; + let (value, _) = + fruits_no_facets_index.update_settings(json!({"searchableAttributes": ["name"]})).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let documents = FRUITS_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + let (value, _) = fruits_no_facets_index.add_documents(documents, None).await; + server.wait_task(value.uid()).await.succeeded(); // fails let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "fruits": ["BOOST", "id", "name"], - "fruits-no-name": ["BOOST", "id", "name"], + fruits_index.uid.clone(): ["BOOST", "id", "name"], + fruits_no_name_index.uid.clone(): ["BOOST", "id", "name"], } }, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits-no-name", "q": "apple red"}, + {"indexUid" : fruits_index.uid.clone(), "q": "apple red"}, + {"indexUid": fruits_no_name_index.uid.clone(), "q": "apple red"}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution: Attribute `name` is not filterable. Available filterable attributes patterns are: `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`", + "message": "Inside `.federation.facetsByIndex.fruits-no-name-[uuid]`: Invalid facet distribution: Attribute `name` is not filterable. Available filterable attributes patterns are: `BOOST, id`.\n - Note: index `fruits-no-name-[uuid]` used in `.queries[1]`", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -3658,18 +3363,18 @@ async fn federation_non_faceted_for_an_index() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "fruits": ["BOOST", "id", "name"], - "fruits-no-name": ["BOOST", "id", "name"], + fruits_index.uid.clone(): ["BOOST", "id", "name"], + fruits_no_name_index.uid.clone(): ["BOOST", "id", "name"], } }, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red"}, + {"indexUid" : fruits_index.uid.clone(), "q": "apple red"}, + {"indexUid": fruits_index.uid.clone(), "q": "apple red"}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution: Attribute `name` is not filterable. Available filterable attributes patterns are: `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries", + "message": "Inside `.federation.facetsByIndex.fruits-no-name-[uuid]`: Invalid facet distribution: Attribute `name` is not filterable. Available filterable attributes patterns are: `BOOST, id`.\n - Note: index `fruits-no-name-[uuid]` is not used in queries", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -3680,19 +3385,19 @@ async fn federation_non_faceted_for_an_index() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "fruits": ["BOOST", "id", "name"], - "fruits-no-name": ["BOOST", "id"], - "fruits-no-facets": ["BOOST", "id"], + fruits_index.uid.clone(): ["BOOST", "id", "name"], + fruits_no_name_index.uid.clone(): ["BOOST", "id"], + fruits_no_facets_index.uid.clone(): ["BOOST", "id"], } }, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red"}, + {"indexUid" : fruits_index.uid.clone(), "q": "apple red"}, + {"indexUid": fruits_index.uid.clone(), "q": "apple red"}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r#" + snapshot!(json_string!(response), @r#" { - "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution: Attributes `BOOST, id` are not filterable. This index does not have configured filterable attributes.\n - Note: index `fruits-no-facets` is not used in queries", + "message": "Inside `.federation.facetsByIndex.fruits-no-facets-[uuid]`: Invalid facet distribution: Attributes `BOOST, id` are not filterable. This index does not have configured filterable attributes.\n - Note: index `fruits-no-facets-[uuid]` is not used in queries", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -3704,15 +3409,15 @@ async fn federation_non_faceted_for_an_index() { .multi_search(json!({"federation": { "facetsByIndex": { "zorglub": ["BOOST", "id", "name"], - "fruits": ["BOOST", "id", "name"], + fruits_index.uid.clone(): ["BOOST", "id", "name"], } }, "queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red"}, + {"indexUid" : fruits_index.uid.clone(), "q": "apple red"}, + {"indexUid": fruits_index.uid.clone(), "q": "apple red"}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n - Note: index `zorglub` is not used in queries", "code": "index_not_found", @@ -3724,23 +3429,23 @@ async fn federation_non_faceted_for_an_index() { #[actix_rt::test] async fn federation_non_federated_contains_federation_option() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("fruits"); + let index = server.unique_index(); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); // fail when a non-federated query contains "federationOptions" let (response, code) = server .multi_search(json!({"queries": [ - {"indexUid" : "fruits", "q": "apple red"}, - {"indexUid": "fruits", "q": "apple red", "federationOptions": {}}, + {"indexUid" : index.uid.clone(), "q": "apple red"}, + {"indexUid": index.uid.clone(), "q": "apple red", "federationOptions": {}}, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(response, @r###" { "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #1 or add `federation` to the request.", "code": "invalid_multi_search_federation_options", @@ -3752,9 +3457,9 @@ async fn federation_non_federated_contains_federation_option() { #[actix_rt::test] async fn federation_vector_single_index() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("vectors"); + let index = server.unique_index(); let (value, _) = index .update_settings(json!({"embedders": { @@ -3768,29 +3473,29 @@ async fn federation_vector_single_index() { } }})) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let documents = VECTOR_DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); // same embedder let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "vectors", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, - {"indexUid": "vectors", "vector": [0.5, 0.5, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, + {"indexUid" : index.uid.clone(), "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, + {"indexUid": index.uid.clone(), "vector": [0.5, 0.5, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "id": "B", "description": "the kitten scratched the beagle", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9870882034301758 } @@ -3799,7 +3504,7 @@ async fn federation_vector_single_index() { "id": "D", "description": "the little boy pets the puppy", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9728479385375975 } @@ -3808,7 +3513,7 @@ async fn federation_vector_single_index() { "id": "C", "description": "the dog had to stay alone today", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9701486229896544 } @@ -3817,13 +3522,13 @@ async fn federation_vector_single_index() { "id": "A", "description": "the dog barks at the cat", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9191691875457764 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 4, @@ -3834,20 +3539,20 @@ async fn federation_vector_single_index() { // distinct embedder let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "vectors", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, + {"indexUid" : index.uid.clone(), "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, // joyful and energetic first - {"indexUid": "vectors", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}}, + {"indexUid": index.uid.clone(), "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "id": "D", "description": "the little boy pets the puppy", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.979868710041046 } @@ -3856,7 +3561,7 @@ async fn federation_vector_single_index() { "id": "C", "description": "the dog had to stay alone today", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9701486229896544 } @@ -3865,7 +3570,7 @@ async fn federation_vector_single_index() { "id": "B", "description": "the kitten scratched the beagle", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8601469993591309 } @@ -3874,13 +3579,13 @@ async fn federation_vector_single_index() { "id": "A", "description": "the dog barks at the cat", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8432406187057495 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 4, @@ -3891,21 +3596,21 @@ async fn federation_vector_single_index() { // hybrid search, distinct embedder let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "vectors", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true}, + {"indexUid" : index.uid, "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true}, // joyful and energetic first - {"indexUid": "vectors", "vector": [0.8, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"},"showRankingScore": true}, - {"indexUid": "vectors", "q": "dog", "showRankingScore": true}, + {"indexUid": index.uid, "vector": [0.8, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"},"showRankingScore": true}, + {"indexUid": index.uid, "q": "dog", "showRankingScore": true}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { "id": "D", "description": "the little boy pets the puppy", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.979868710041046 }, @@ -3915,7 +3620,7 @@ async fn federation_vector_single_index() { "id": "C", "description": "the dog had to stay alone today", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9701486229896544 }, @@ -3925,7 +3630,7 @@ async fn federation_vector_single_index() { "id": "A", "description": "the dog barks at the cat", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9242424242424242 }, @@ -3935,14 +3640,14 @@ async fn federation_vector_single_index() { "id": "B", "description": "the kitten scratched the beagle", "_federation": { - "indexUid": "vectors", + "indexUid": "[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8601469993591309 }, "_rankingScore": "[score]" } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 4, @@ -3953,11 +3658,11 @@ async fn federation_vector_single_index() { #[actix_rt::test] async fn federation_vector_two_indexes() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("vectors-animal"); + let vectors_animal_index = server.unique_index_with_prefix("vectors-animal"); - let (value, _) = index + let (value, _) = vectors_animal_index .update_settings(json!({"embedders": { "animal": { "source": "userProvided", @@ -3965,16 +3670,16 @@ async fn federation_vector_two_indexes() { }, }})) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let documents = VECTOR_DOCUMENTS.clone(); - let (value, code) = index.add_documents(documents, None).await; + let (value, code) = vectors_animal_index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - let index = server.index("vectors-sentiment"); + let vectors_sentiment_index = server.unique_index_with_prefix("vectors-sentiment"); - let (value, _) = index + let (value, _) = vectors_sentiment_index .update_settings(json!({"embedders": { "sentiment": { "source": "userProvided", @@ -3982,23 +3687,23 @@ async fn federation_vector_two_indexes() { } }})) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let documents = VECTOR_DOCUMENTS.clone(); - let (value, code) = index.add_documents(documents, None).await; + let (value, code) = vectors_sentiment_index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "retrieveVectors": true}, + {"indexUid" : vectors_animal_index.uid, "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "retrieveVectors": true}, // joyful and energetic first - {"indexUid": "vectors-sentiment", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "retrieveVectors": true}, - {"indexUid": "vectors-sentiment", "q": "dog", "retrieveVectors": true}, + {"indexUid": vectors_sentiment_index.uid, "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "retrieveVectors": true}, + {"indexUid": vectors_sentiment_index.uid, "q": "dog", "retrieveVectors": true}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { @@ -4021,7 +3726,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.979868710041046 } @@ -4046,7 +3751,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9728479385375975 } @@ -4071,7 +3776,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9701486229896544 } @@ -4096,7 +3801,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9242424242424242 } @@ -4121,7 +3826,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9242424242424242 } @@ -4146,7 +3851,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8601469993591309 } @@ -4171,7 +3876,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8432406187057495 } @@ -4196,13 +3901,13 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.6690993905067444 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 8, @@ -4213,12 +3918,12 @@ async fn federation_vector_two_indexes() { // hybrid search, distinct embedder let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ - {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true, "retrieveVectors": true}, - {"indexUid": "vectors-sentiment", "vector": [-1, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "showRankingScore": true, "retrieveVectors": true,}, + {"indexUid" : vectors_animal_index.uid, "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true, "retrieveVectors": true}, + {"indexUid": vectors_sentiment_index.uid, "vector": [-1, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "showRankingScore": true, "retrieveVectors": true,}, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###" { "hits": [ { @@ -4241,7 +3946,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9728479385375975 }, @@ -4267,7 +3972,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9701486229896544 }, @@ -4293,7 +3998,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9522157907485962 }, @@ -4319,7 +4024,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.8719604015350342 }, @@ -4345,7 +4050,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8601469993591309 }, @@ -4371,7 +4076,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-animal", + "indexUid": "vectors-animal-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8432406187057495 }, @@ -4397,7 +4102,7 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.8297949433326721 }, @@ -4423,14 +4128,14 @@ async fn federation_vector_two_indexes() { } }, "_federation": { - "indexUid": "vectors-sentiment", + "indexUid": "vectors-sentiment-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.18887794017791748 }, "_rankingScore": "[score]" } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 8, @@ -4441,37 +4146,17 @@ async fn federation_vector_two_indexes() { #[actix_rt::test] async fn federation_facets_different_indexes_same_facet() { - let server = Server::new().await; + let server = Server::new_shared(); + let movies_index = shared_movies_index().await; + let batman_index = shared_batman_index().await; - let index = server.index("movies"); - - let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "filterableAttributes": ["title", "color"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - let index = server.index("batman"); + let batman_2_index = server.unique_index_with_prefix("batman_2"); let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + let (value, _) = batman_2_index.add_documents(documents, None).await; + server.wait_task(value.uid()).await.succeeded(); - let (value, _) = index + let (value, _) = batman_2_index .update_settings(json!({ "sortableAttributes": ["title"], "filterableAttributes": ["title"], @@ -4485,52 +4170,30 @@ async fn federation_facets_different_indexes_same_facet() { ] })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - let index = server.index("batman-2"); - - let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "filterableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - // return titles ordered accross indexes + // return titles ordered across indexes let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "batman-2": ["title"], + movies_index.uid.clone(): ["title", "color"], + batman_index.uid.clone(): ["title"], + batman_2_index.uid.clone(): ["title"], } }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_2_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Badman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4538,7 +4201,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Badman", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4546,7 +4209,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4554,7 +4217,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4562,7 +4225,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4570,7 +4233,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4578,7 +4241,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4586,7 +4249,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4594,7 +4257,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4602,7 +4265,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4610,7 +4273,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Captain Marvel", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4618,7 +4281,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Escape Room", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4626,7 +4289,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Gläss", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4634,7 +4297,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4642,18 +4305,18 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Shazam!", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 15, "facetsByIndex": { - "batman": { + "batman-[uuid]": { "distribution": { "title": { "Badman": 1, @@ -4665,7 +4328,7 @@ async fn federation_facets_different_indexes_same_facet() { }, "stats": {} }, - "batman-2": { + "batman_2-[uuid]": { "distribution": { "title": { "Badman": 1, @@ -4677,7 +4340,7 @@ async fn federation_facets_different_indexes_same_facet() { }, "stats": {} }, - "movies": { + "movies-[uuid]": { "distribution": { "color": { "blue": 3, @@ -4702,25 +4365,25 @@ async fn federation_facets_different_indexes_same_facet() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "movies": ["title"], - "batman": ["title"], - "batman-2": ["title"] + movies_index.uid.clone(): ["title"], + batman_index.uid.clone(): ["title"], + batman_2_index.uid.clone(): ["title"] }, "mergeFacets": {} }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_2_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Badman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4728,7 +4391,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Badman", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4736,7 +4399,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4744,7 +4407,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4752,7 +4415,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4760,7 +4423,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4768,7 +4431,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4776,7 +4439,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4784,7 +4447,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -4792,7 +4455,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -4800,7 +4463,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Captain Marvel", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4808,7 +4471,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Escape Room", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4816,7 +4479,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Gläss", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4824,7 +4487,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -4832,13 +4495,13 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Shazam!", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 15, @@ -4864,25 +4527,25 @@ async fn federation_facets_different_indexes_same_facet() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "movies": [], - "batman": ["title"], - "batman-2": ["title"] + movies_index.uid.clone(): [], + batman_index.uid.clone(): ["title"], + batman_2_index.uid.clone(): ["title"] } }, "queries": [ - {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman-2", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_index.uid.clone(), "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_2_index.uid.clone(), "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies_index.uid.clone(), "q": "captain", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_index.uid.clone(), "q": "the bat", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Captain Marvel", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 2, "weightedRankingScore": 0.9848484848484848 } @@ -4890,7 +4553,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 3, "weightedRankingScore": 0.9528218694885362 } @@ -4898,7 +4561,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.7028218694885362 } @@ -4906,7 +4569,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 3, "weightedRankingScore": 0.9528218694885362 } @@ -4914,7 +4577,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.7028218694885362 } @@ -4922,7 +4585,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.8317901234567902 } @@ -4930,7 +4593,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.8317901234567902 } @@ -4938,7 +4601,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.23106060606060605 } @@ -4946,7 +4609,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Batman", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.23106060606060605 } @@ -4954,7 +4617,7 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Badman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.5 } @@ -4962,18 +4625,18 @@ async fn federation_facets_different_indexes_same_facet() { { "title": "Badman", "_federation": { - "indexUid": "batman-2", + "indexUid": "batman_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.5 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 11, "facetsByIndex": { - "batman": { + "batman-[uuid]": { "distribution": { "title": { "Badman": 1, @@ -4985,7 +4648,7 @@ async fn federation_facets_different_indexes_same_facet() { }, "stats": {} }, - "batman-2": { + "batman_2-[uuid]": { "distribution": { "title": { "Badman": 1, @@ -4997,7 +4660,7 @@ async fn federation_facets_different_indexes_same_facet() { }, "stats": {} }, - "movies": { + "movies-[uuid]": { "distribution": {}, "stats": {} } @@ -5008,15 +4671,15 @@ async fn federation_facets_different_indexes_same_facet() { #[actix_rt::test] async fn federation_facets_same_indexes() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("doggos"); + let doggos_index = server.unique_index_with_prefix("doggos"); let documents = NESTED_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + let (value, _) = doggos_index.add_documents(documents, None).await; + server.wait_task(value.uid()).await.succeeded(); - let (value, _) = index + let (value, _) = doggos_index .update_settings(json!({ "filterableAttributes": ["father", "mother", "doggos.age"], "rankingRules": [ @@ -5029,15 +4692,15 @@ async fn federation_facets_same_indexes() { ] })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - let index = server.index("doggos-2"); + let doggos2_index = server.unique_index_with_prefix("doggos_2"); let documents = NESTED_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + let (value, _) = doggos2_index.add_documents(documents, None).await; + server.wait_task(value.uid()).await.succeeded(); - let (value, _) = index + let (value, _) = doggos2_index .update_settings(json!({ "filterableAttributes": ["father", "mother", "doggos.age"], "rankingRules": [ @@ -5050,26 +4713,26 @@ async fn federation_facets_same_indexes() { ] })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "doggos": ["father", "mother", "doggos.age"] + doggos_index.uid.clone(): ["father", "mother", "doggos.age"] } }, "queries": [ - {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, - {"indexUid" : "doggos", "q": "michel", "attributesToRetrieve": ["id"] }, + {"indexUid" : doggos_index.uid.clone(), "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : doggos_index.uid.clone(), "q": "michel", "attributesToRetrieve": ["id"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "id": 852, "_federation": { - "indexUid": "doggos", + "indexUid": "doggos-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9621212121212122 } @@ -5077,7 +4740,7 @@ async fn federation_facets_same_indexes() { { "id": 951, "_federation": { - "indexUid": "doggos", + "indexUid": "doggos-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9621212121212122 } @@ -5085,18 +4748,18 @@ async fn federation_facets_same_indexes() { { "id": 750, "_federation": { - "indexUid": "doggos", + "indexUid": "doggos-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9621212121212122 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3, "facetsByIndex": { - "doggos": { + "doggos-[uuid]": { "distribution": { "doggos.age": { "2": 1, @@ -5128,22 +4791,22 @@ async fn federation_facets_same_indexes() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "doggos": ["father", "mother", "doggos.age"], - "doggos-2": ["father", "mother", "doggos.age"] + doggos_index.uid.clone(): ["father", "mother", "doggos.age"], + doggos2_index.uid.clone(): ["father", "mother", "doggos.age"] } }, "queries": [ - {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, - {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + {"indexUid" : doggos_index.uid.clone(), "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : doggos2_index.uid.clone(), "q": "michel", "attributesToRetrieve": ["id"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "id": 852, "_federation": { - "indexUid": "doggos", + "indexUid": "doggos-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9621212121212122 } @@ -5151,7 +4814,7 @@ async fn federation_facets_same_indexes() { { "id": 951, "_federation": { - "indexUid": "doggos", + "indexUid": "doggos-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9621212121212122 } @@ -5159,7 +4822,7 @@ async fn federation_facets_same_indexes() { { "id": 852, "_federation": { - "indexUid": "doggos-2", + "indexUid": "doggos_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9621212121212122 } @@ -5167,18 +4830,18 @@ async fn federation_facets_same_indexes() { { "id": 750, "_federation": { - "indexUid": "doggos-2", + "indexUid": "doggos_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9621212121212122 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 4, "facetsByIndex": { - "doggos": { + "doggos-[uuid]": { "distribution": { "doggos.age": { "2": 1, @@ -5202,7 +4865,7 @@ async fn federation_facets_same_indexes() { } } }, - "doggos-2": { + "doggos_2-[uuid]": { "distribution": { "doggos.age": { "2": 1, @@ -5230,23 +4893,23 @@ async fn federation_facets_same_indexes() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "doggos": ["father", "mother", "doggos.age"], - "doggos-2": ["father", "mother", "doggos.age"] + doggos_index.uid.clone(): ["father", "mother", "doggos.age"], + doggos2_index.uid.clone(): ["father", "mother", "doggos.age"] }, "mergeFacets": {}, }, "queries": [ - {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, - {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + {"indexUid" : doggos_index.uid.clone(), "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : doggos2_index.uid.clone(), "q": "michel", "attributesToRetrieve": ["id"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "id": 852, "_federation": { - "indexUid": "doggos", + "indexUid": "doggos-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9621212121212122 } @@ -5254,7 +4917,7 @@ async fn federation_facets_same_indexes() { { "id": 951, "_federation": { - "indexUid": "doggos", + "indexUid": "doggos-[uuid]", "queriesPosition": 0, "weightedRankingScore": 0.9621212121212122 } @@ -5262,7 +4925,7 @@ async fn federation_facets_same_indexes() { { "id": 852, "_federation": { - "indexUid": "doggos-2", + "indexUid": "doggos_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9621212121212122 } @@ -5270,13 +4933,13 @@ async fn federation_facets_same_indexes() { { "id": 750, "_federation": { - "indexUid": "doggos-2", + "indexUid": "doggos_2-[uuid]", "queriesPosition": 1, "weightedRankingScore": 0.9621212121212122 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 4, @@ -5309,37 +4972,17 @@ async fn federation_facets_same_indexes() { #[actix_rt::test] async fn federation_inconsistent_merge_order() { - let server = Server::new().await; + let server = Server::new_shared(); - let index = server.index("movies"); + let movies_index = shared_movies_index().await; + + let movies2_index = server.unique_index_with_prefix("movies_2"); let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); + let (value, _) = movies2_index.add_documents(documents, None).await; + server.wait_task(value.uid()).await.succeeded(); - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "filterableAttributes": ["title", "color"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); - - let index = server.index("movies-2"); - - let documents = DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index + let (value, _) = movies2_index .update_settings(json!({ "sortableAttributes": ["title"], "filterableAttributes": ["title", "color"], @@ -5356,52 +4999,32 @@ async fn federation_inconsistent_merge_order() { } })) .await; - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); - let index = server.index("batman"); - - let documents = SCORE_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - - let (value, _) = index - .update_settings(json!({ - "sortableAttributes": ["title"], - "filterableAttributes": ["title"], - "rankingRules": [ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness" - ] - })) - .await; - index.wait_task(value.uid()).await.succeeded(); + let batman_index = shared_batman_index().await; // without merging, it works let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], + movies_index.uid.clone(): ["title", "color"], + batman_index.uid.clone(): ["title"], + movies2_index.uid.clone(): ["title", "color"], } }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies2_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Badman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5409,7 +5032,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5417,7 +5040,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5425,7 +5048,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5433,7 +5056,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5441,7 +5064,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Captain Marvel", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5449,7 +5072,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Captain Marvel", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5457,7 +5080,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Escape Room", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5465,7 +5088,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Escape Room", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5473,7 +5096,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Gläss", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5481,7 +5104,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Gläss", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5489,7 +5112,7 @@ async fn federation_inconsistent_merge_order() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5497,7 +5120,7 @@ async fn federation_inconsistent_merge_order() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5505,7 +5128,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Shazam!", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5513,18 +5136,18 @@ async fn federation_inconsistent_merge_order() { { "title": "Shazam!", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 15, "facetsByIndex": { - "batman": { + "batman-[uuid]": { "distribution": { "title": { "Badman": 1, @@ -5536,7 +5159,7 @@ async fn federation_inconsistent_merge_order() { }, "stats": {} }, - "movies": { + "movies-[uuid]": { "distribution": { "color": { "blue": 3, @@ -5554,7 +5177,7 @@ async fn federation_inconsistent_merge_order() { }, "stats": {} }, - "movies-2": { + "movies_2-[uuid]": { "distribution": { "color": { "red": 3, @@ -5580,21 +5203,21 @@ async fn federation_inconsistent_merge_order() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], + movies_index.uid.clone(): ["title", "color"], + batman_index.uid.clone(): ["title"], + movies2_index.uid.clone(): ["title", "color"], }, "mergeFacets": {} }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies2_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, ]})) .await; snapshot!(code, @"400 Bad Request"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response), @r###" { - "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n - Note: index `movies-2` used in `.queries[2]`", + "message": "Inside `.federation.facetsByIndex.movies_2-[uuid]`: Inconsistent order for values in facet `color`: index `movies-[uuid]` orders alphabetically, but index `movies_2-[uuid]` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n - Note: index `movies_2-[uuid]` used in `.queries[2]`", "code": "invalid_multi_search_facet_order", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" @@ -5605,27 +5228,27 @@ async fn federation_inconsistent_merge_order() { let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title"], + movies_index.uid.clone(): ["title", "color"], + batman_index.uid.clone(): ["title"], + movies2_index.uid.clone(): ["title"], }, "mergeFacets": { "maxValuesPerFacet": 3, } }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : batman_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : movies2_index.uid.clone(), "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { "title": "Badman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5633,7 +5256,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5641,7 +5264,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman Returns", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5649,7 +5272,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman the dark knight returns: Part 1", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5657,7 +5280,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Batman the dark knight returns: Part 2", "_federation": { - "indexUid": "batman", + "indexUid": "batman-[uuid]", "queriesPosition": 1, "weightedRankingScore": 1.0 } @@ -5665,7 +5288,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Captain Marvel", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5673,7 +5296,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Captain Marvel", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5681,7 +5304,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Escape Room", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5689,7 +5312,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Escape Room", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5697,7 +5320,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Gläss", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5705,7 +5328,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Gläss", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5713,7 +5336,7 @@ async fn federation_inconsistent_merge_order() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5721,7 +5344,7 @@ async fn federation_inconsistent_merge_order() { { "title": "How to Train Your Dragon: The Hidden World", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } @@ -5729,7 +5352,7 @@ async fn federation_inconsistent_merge_order() { { "title": "Shazam!", "_federation": { - "indexUid": "movies", + "indexUid": "movies-[uuid]", "queriesPosition": 0, "weightedRankingScore": 1.0 } @@ -5737,13 +5360,13 @@ async fn federation_inconsistent_merge_order() { { "title": "Shazam!", "_federation": { - "indexUid": "movies-2", + "indexUid": "movies_2-[uuid]", "queriesPosition": 2, "weightedRankingScore": 1.0 } } ], - "processingTimeMs": "[time]", + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 15, diff --git a/crates/meilisearch/tests/search/multi/proxy.rs b/crates/meilisearch/tests/search/multi/proxy.rs index 55736d058..c537f5ae8 100644 --- a/crates/meilisearch/tests/search/multi/proxy.rs +++ b/crates/meilisearch/tests/search/multi/proxy.rs @@ -1224,6 +1224,7 @@ async fn error_bad_request_facets_by_index_facet() { } #[actix_rt::test] +#[ignore] async fn error_remote_does_not_answer() { let ms0 = Server::new().await; let ms1 = Server::new().await; diff --git a/crates/meilisearch/tests/search/pagination.rs b/crates/meilisearch/tests/search/pagination.rs index f8b698a95..c0752e7ec 100644 --- a/crates/meilisearch/tests/search/pagination.rs +++ b/crates/meilisearch/tests/search/pagination.rs @@ -114,14 +114,14 @@ async fn ensure_placeholder_search_hit_count_valid() { } ]); let (task, _code) = index.add_documents(documents, None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _code) = index .update_settings( json!({ "rankingRules": ["distinct:asc"], "distinctAttribute": "distinct"}), ) .await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); for page in 0..=4 { index diff --git a/crates/meilisearch/tests/search/restrict_searchable.rs b/crates/meilisearch/tests/search/restrict_searchable.rs index e5408a210..bbd2a4ee3 100644 --- a/crates/meilisearch/tests/search/restrict_searchable.rs +++ b/crates/meilisearch/tests/search/restrict_searchable.rs @@ -9,7 +9,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) let index = server.unique_index(); let (task, _code) = index.add_documents(documents.clone(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index } @@ -65,7 +65,7 @@ async fn search_no_searchable_attribute_set() { .await; let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -78,7 +78,7 @@ async fn search_no_searchable_attribute_set() { .await; let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -109,7 +109,7 @@ async fn search_on_all_attributes_restricted_set() { let server = Server::new_shared(); let index = index_with_documents(server, &SIMPLE_SEARCH_DOCUMENTS).await; let (task, _status_code) = index.update_settings_searchable_attributes(json!(["title"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["*"]}), |response, code| { @@ -194,7 +194,7 @@ async fn word_ranking_rule_order_exact_words() { let (task, _status_code) = index .update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // simple search should return 2 documents (ids: 2 and 3). index @@ -360,7 +360,7 @@ async fn search_on_exact_field() { let (response, code) = index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await; assert_eq!(202, code, "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); // Searching on an exact attribute should only return the document matching without typo. index .search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| { @@ -557,7 +557,7 @@ async fn nested_search_on_title_restricted_set_with_suffix_wildcard() { let index = index_with_documents(server, &NESTED_SEARCH_DOCUMENTS).await; let (task, _status_code) = index.update_settings_searchable_attributes(json!(["details.title"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -595,7 +595,7 @@ async fn nested_search_no_searchable_attribute_set_with_any_wildcard() { .await; let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( @@ -608,7 +608,7 @@ async fn nested_search_no_searchable_attribute_set_with_any_wildcard() { .await; let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); index .search( diff --git a/crates/meilisearch/tests/settings/distinct.rs b/crates/meilisearch/tests/settings/distinct.rs index a3b1b5276..a704ab3da 100644 --- a/crates/meilisearch/tests/settings/distinct.rs +++ b/crates/meilisearch/tests/settings/distinct.rs @@ -7,7 +7,7 @@ async fn set_and_reset_distinct_attribute() { let index = server.unique_index(); let (task1, _code) = index.update_settings(json!({ "distinctAttribute": "test"})).await; - index.wait_task(task1.uid()).await.succeeded(); + server.wait_task(task1.uid()).await.succeeded(); let (response, _) = index.settings().await; @@ -15,7 +15,7 @@ async fn set_and_reset_distinct_attribute() { let (task2, _status_code) = index.update_settings(json!({ "distinctAttribute": null })).await; - index.wait_task(task2.uid()).await.succeeded(); + server.wait_task(task2.uid()).await.succeeded(); let (response, _) = index.settings().await; @@ -28,7 +28,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() { let index = server.unique_index(); let (update_task1, _code) = index.update_distinct_attribute(json!("test")).await; - index.wait_task(update_task1.uid()).await.succeeded(); + server.wait_task(update_task1.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; @@ -36,7 +36,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() { let (update_task2, _status_code) = index.update_distinct_attribute(json!(null)).await; - index.wait_task(update_task2.uid()).await.succeeded(); + server.wait_task(update_task2.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; diff --git a/crates/meilisearch/tests/settings/errors.rs b/crates/meilisearch/tests/settings/errors.rs index 4220cdbf8..6654a95a4 100644 --- a/crates/meilisearch/tests/settings/errors.rs +++ b/crates/meilisearch/tests/settings/errors.rs @@ -338,6 +338,47 @@ async fn settings_bad_pagination() { "###); } +#[actix_rt::test] +async fn settings_bad_max_total_hits() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let (response, code) = + index.update_settings(json!({ "pagination": { "maxTotalHits": "doggo" } })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type at `.pagination.maxTotalHits`: expected a positive integer, but found a string: `\"doggo\"`", + "code": "invalid_settings_pagination", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_pagination" + } + "###); + + let (response, code) = + index.update_settings_pagination(json!({ "maxTotalHits": "doggo" } )).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r#" + { + "message": "Invalid value type at `.maxTotalHits`: expected a positive integer, but found a string: `\"doggo\"`", + "code": "invalid_settings_pagination", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_pagination" + } + "#); + + let (response, code) = index.update_settings_pagination(json!({ "maxTotalHits": 0 } )).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r#" + { + "message": "Invalid value at `.maxTotalHits`: a non-zero integer value lower than `18446744073709551615` was expected, but found a zero", + "code": "invalid_settings_pagination", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_pagination" + } + "#); +} + #[actix_rt::test] async fn settings_bad_search_cutoff_ms() { let server = Server::new_shared(); diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs index 2dc2b175f..47e699380 100644 --- a/crates/meilisearch/tests/settings/get_settings.rs +++ b/crates/meilisearch/tests/settings/get_settings.rs @@ -58,7 +58,7 @@ macro_rules! test_setting_routes { let index = server.unique_index(); let (response, code) = index.create(None).await; assert_eq!(code, 202, "{response}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let url = format!("/indexes/{}/settings/{}", index.uid, stringify!($setting) @@ -184,6 +184,16 @@ test_setting_routes!( update_verb: patch, default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [], "disableOnNumbers": false} }, + { + setting: chat, + update_verb: put, + default_value: { + "description": "", + "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + "documentTemplateMaxBytes": 400, + "searchParameters": {} + } + }, ); #[actix_rt::test] @@ -199,7 +209,7 @@ async fn get_settings() { let server = Server::new_shared(); let index = server.unique_index(); let (response, _code) = index.create(None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); @@ -237,6 +247,20 @@ async fn get_settings() { assert_eq!(settings["prefixSearch"], json!("indexingTime")); assert_eq!(settings["facetSearch"], json!(true)); assert_eq!(settings["embedders"], json!({})); + assert_eq!(settings["synonyms"], json!({})); + assert_eq!( + settings["typoTolerance"], + json!({ + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [], + "disableOnNumbers": false + }) + ); } #[actix_rt::test] @@ -244,7 +268,7 @@ async fn secrets_are_hidden_in_settings() { let server = Server::new_shared(); let index = server.unique_index(); let (response, _code) = index.create(None).await; - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -275,7 +299,7 @@ async fn secrets_are_hidden_in_settings() { let settings_update_uid = response.uid(); - index.wait_task(settings_update_uid).await.succeeded(); + server.wait_task(settings_update_uid).await.succeeded(); let (response, code) = index.settings().await; meili_snap::snapshot!(code, @"200 OK"); @@ -374,14 +398,14 @@ async fn test_partial_update() { let server = Server::new_shared(); let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"displayedAttributes": ["foo"]})).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["foo"])); assert_eq!(response["searchableAttributes"], json!(["*"])); let (task, _) = index.update_settings(json!({"searchableAttributes": ["bar"]})).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); @@ -396,7 +420,7 @@ async fn error_delete_settings_unexisting_index() { let (task, code) = index.delete_settings().await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.failed(); + server.wait_task(task.uid()).await.failed(); } #[actix_rt::test] @@ -414,12 +438,19 @@ async fn reset_all_settings() { let (response, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); - let (update_task,_status_code) = index - .update_settings(json!({"displayedAttributes": ["name", "age"], "searchableAttributes": ["name"], "stopWords": ["the"], "filterableAttributes": ["age"], "synonyms": {"puppy": ["dog", "doggo", "potat"] }})) + let (update_task, _status_code) = index + .update_settings(json!({ + "displayedAttributes": ["name", "age"], + "searchableAttributes": ["name"], + "stopWords": ["the"], + "filterableAttributes": ["age"], + "synonyms": {"puppy": ["dog", "doggo", "potat"] }, + "typoTolerance": {"disableOnNumbers": true} + })) .await; - index.wait_task(update_task.uid()).await.succeeded(); + server.wait_task(update_task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["name", "age"])); @@ -427,9 +458,22 @@ async fn reset_all_settings() { assert_eq!(response["stopWords"], json!(["the"])); assert_eq!(response["synonyms"], json!({"puppy": ["dog", "doggo", "potat"] })); assert_eq!(response["filterableAttributes"], json!(["age"])); + assert_eq!( + response["typoTolerance"], + json!({ + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [], + "disableOnNumbers": true + }) + ); let (delete_task, _status_code) = index.delete_settings().await; - index.wait_task(delete_task.uid()).await.succeeded(); + server.wait_task(delete_task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); @@ -438,6 +482,19 @@ async fn reset_all_settings() { assert_eq!(response["stopWords"], json!([])); assert_eq!(response["filterableAttributes"], json!([])); assert_eq!(response["synonyms"], json!({})); + assert_eq!( + response["typoTolerance"], + json!({ + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [], + "disableOnNumbers": false + }) + ); let (response, code) = index.get_document(1, None).await; assert_eq!(code, 200); @@ -450,11 +507,11 @@ async fn update_setting_unexisting_index() { let index = server.unique_index(); let (task, code) = index.update_settings(json!({})).await; assert_eq!(code, 202); - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (_response, code) = index.get().await; assert_eq!(code, 200); let (task, _status_code) = index.delete_settings().await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); } #[actix_rt::test] @@ -497,7 +554,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() { let index = server.unique_index(); let (task, _code) = index.update_distinct_attribute(json!("test")).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; @@ -505,7 +562,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() { let (task, _status_code) = index.update_distinct_attribute(json!(null)).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; @@ -530,7 +587,7 @@ async fn granular_filterable_attributes() { { "attributePatterns": ["default-facet-search"], "features": { "filter": {"equality": true, "comparison": true} } }, ] })).await; assert_eq!(code, 202); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200, "{response}"); diff --git a/crates/meilisearch/tests/settings/prefix_search_settings.rs b/crates/meilisearch/tests/settings/prefix_search_settings.rs index 5da758a7d..81e1f40fc 100644 --- a/crates/meilisearch/tests/settings/prefix_search_settings.rs +++ b/crates/meilisearch/tests/settings/prefix_search_settings.rs @@ -26,11 +26,11 @@ static DOCUMENTS: Lazy = Lazy::new(|| { #[actix_rt::test] async fn add_docs_and_disable() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -38,8 +38,8 @@ async fn add_docs_and_disable() { "rankingRules": ["words", "typo", "proximity"], })) .await; - assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(response.uid()).await; + assert_eq!("202", code.as_str(), "{response:?}"); + server.wait_task(response.uid()).await.succeeded(); // only 1 document should match index @@ -86,8 +86,8 @@ async fn add_docs_and_disable() { #[actix_rt::test] async fn disable_and_add_docs() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ @@ -95,11 +95,11 @@ async fn disable_and_add_docs() { "rankingRules": ["words", "typo", "proximity"], })) .await; - assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(response.uid()).await; + assert_eq!("202", code.as_str(), "{response:?}"); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); // only 1 document should match index @@ -145,8 +145,8 @@ async fn disable_and_add_docs() { #[actix_rt::test] async fn disable_add_docs_and_enable() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ @@ -154,11 +154,11 @@ async fn disable_add_docs_and_enable() { "rankingRules": ["words", "typo", "proximity"], })) .await; - assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(response.uid()).await; + assert_eq!("202", code.as_str(), "{response:?}"); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -166,8 +166,8 @@ async fn disable_add_docs_and_enable() { "rankingRules": ["words", "typo", "proximity"], })) .await; - assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(2).await; + assert_eq!("202", code.as_str(), "{response:?}"); + server.wait_task(response.uid()).await.succeeded(); // all documents should match index @@ -253,8 +253,8 @@ async fn disable_add_docs_and_enable() { #[actix_rt::test] async fn disable_add_docs_and_reset() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ @@ -262,11 +262,11 @@ async fn disable_add_docs_and_reset() { "rankingRules": ["words", "typo", "proximity"], })) .await; - assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(response.uid()).await; + assert_eq!("202", code.as_str(), "{response:?}"); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -274,8 +274,8 @@ async fn disable_add_docs_and_reset() { "rankingRules": ["words", "typo", "proximity"], })) .await; - assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(2).await; + assert_eq!("202", code.as_str(), "{response:?}"); + server.wait_task(response.uid()).await.succeeded(); // all documents should match index @@ -361,19 +361,19 @@ async fn disable_add_docs_and_reset() { #[actix_rt::test] async fn default_behavior() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ "rankingRules": ["words", "typo", "proximity"], })) .await; - assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(response.uid()).await; + assert_eq!("202", code.as_str(), "{response:?}"); + server.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); // all documents should match index diff --git a/crates/meilisearch/tests/settings/proximity_settings.rs b/crates/meilisearch/tests/settings/proximity_settings.rs index 6de1ffe0e..555c13b58 100644 --- a/crates/meilisearch/tests/settings/proximity_settings.rs +++ b/crates/meilisearch/tests/settings/proximity_settings.rs @@ -30,7 +30,7 @@ async fn attribute_scale_search() { let index = server.unique_index(); let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -39,7 +39,7 @@ async fn attribute_scale_search() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); // the expected order is [1, 3, 2] instead of [3, 1, 2] // because the attribute scale doesn't make the difference between 1 and 3. @@ -103,7 +103,7 @@ async fn attribute_scale_phrase_search() { let index = server.unique_index(); let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (task, _code) = index .update_settings(json!({ @@ -111,7 +111,7 @@ async fn attribute_scale_phrase_search() { "rankingRules": ["words", "typo", "proximity"], })) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // the expected order is [1, 3] instead of [3, 1] // because the attribute scale doesn't make the difference between 1 and 3. @@ -171,7 +171,7 @@ async fn word_scale_set_and_reset() { let index = server.unique_index(); let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); // Set and reset the setting ensuring the swap between the 2 settings is applied. let (update_task1, _code) = index @@ -180,7 +180,7 @@ async fn word_scale_set_and_reset() { "rankingRules": ["words", "typo", "proximity"], })) .await; - index.wait_task(update_task1.uid()).await.succeeded(); + server.wait_task(update_task1.uid()).await.succeeded(); let (update_task2, _code) = index .update_settings(json!({ @@ -188,7 +188,7 @@ async fn word_scale_set_and_reset() { "rankingRules": ["words", "typo", "proximity"], })) .await; - index.wait_task(update_task2.uid()).await.succeeded(); + server.wait_task(update_task2.uid()).await.succeeded(); // [3, 1, 2] index @@ -286,7 +286,7 @@ async fn attribute_scale_default_ranking_rules() { let index = server.unique_index(); let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -294,7 +294,7 @@ async fn attribute_scale_default_ranking_rules() { })) .await; assert_eq!("202", code.as_str(), "{response:?}"); - index.wait_task(response.uid()).await.succeeded(); + server.wait_task(response.uid()).await.succeeded(); // the expected order is [3, 1, 2] index diff --git a/crates/meilisearch/tests/settings/tokenizer_customization.rs b/crates/meilisearch/tests/settings/tokenizer_customization.rs index 7c58368f7..a0631418f 100644 --- a/crates/meilisearch/tests/settings/tokenizer_customization.rs +++ b/crates/meilisearch/tests/settings/tokenizer_customization.rs @@ -15,7 +15,7 @@ async fn set_and_reset() { "dictionary": ["J.R.R.", "J. R. R."], })) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _) = index.settings().await; snapshot!(json_string!(response["nonSeparatorTokens"]), @r###" @@ -45,7 +45,7 @@ async fn set_and_reset() { })) .await; - index.wait_task(task.uid()).await.succeeded(); + server.wait_task(task.uid()).await.succeeded(); let (response, _) = index.settings().await; snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]"); @@ -74,7 +74,7 @@ async fn set_and_search() { let index = server.unique_index(); let (add_task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(add_task.uid()).await.succeeded(); + server.wait_task(add_task.uid()).await.succeeded(); let (update_task, _code) = index .update_settings(json!({ @@ -83,7 +83,7 @@ async fn set_and_search() { "dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"], })) .await; - index.wait_task(update_task.uid()).await.succeeded(); + server.wait_task(update_task.uid()).await.succeeded(); index .search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| { @@ -228,7 +228,7 @@ async fn advanced_synergies() { let index = server.unique_index(); let (add_task, _status_code) = index.add_documents(documents, None).await; - index.wait_task(add_task.uid()).await.succeeded(); + server.wait_task(add_task.uid()).await.succeeded(); let (update_task, _code) = index .update_settings(json!({ @@ -243,7 +243,7 @@ async fn advanced_synergies() { } })) .await; - index.wait_task(update_task.uid()).await.succeeded(); + server.wait_task(update_task.uid()).await.succeeded(); index .search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| { @@ -353,7 +353,7 @@ async fn advanced_synergies() { "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."], })) .await; - index.wait_task(_response.uid()).await.succeeded(); + server.wait_task(_response.uid()).await.succeeded(); index .search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| { diff --git a/crates/meilisearch/tests/similar/mod.rs b/crates/meilisearch/tests/similar/mod.rs index defb777e0..fdfcc1665 100644 --- a/crates/meilisearch/tests/similar/mod.rs +++ b/crates/meilisearch/tests/similar/mod.rs @@ -47,8 +47,8 @@ static DOCUMENTS: Lazy = Lazy::new(|| { #[actix_rt::test] async fn basic() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ @@ -61,12 +61,12 @@ async fn basic() { "filterableAttributes": ["title"]})) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); index .similar( @@ -233,8 +233,8 @@ async fn basic() { #[actix_rt::test] async fn ranking_score_threshold() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ @@ -247,12 +247,12 @@ async fn ranking_score_threshold() { "filterableAttributes": ["title"]})) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); index .similar( @@ -503,8 +503,8 @@ async fn ranking_score_threshold() { #[actix_rt::test] async fn filter() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ @@ -517,12 +517,12 @@ async fn filter() { "filterableAttributes": ["title", "release_year"]})) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); index .similar( @@ -621,8 +621,8 @@ async fn filter() { #[actix_rt::test] async fn limit_and_offset() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index_with_prefix("test"); let (response, code) = index .update_settings(json!({ @@ -635,12 +635,12 @@ async fn limit_and_offset() { "filterableAttributes": ["title"]})) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await.succeeded(); + server.wait_task(value.uid()).await.succeeded(); index .similar( diff --git a/crates/meilisearch/tests/tasks/errors.rs b/crates/meilisearch/tests/tasks/errors.rs index 759531d42..9970bafa4 100644 --- a/crates/meilisearch/tests/tasks/errors.rs +++ b/crates/meilisearch/tests/tasks/errors.rs @@ -97,7 +97,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r#" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -108,7 +108,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r#" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -119,7 +119,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r#" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/crates/meilisearch/tests/upgrade/mod.rs b/crates/meilisearch/tests/upgrade/mod.rs index f1e45164e..8114ed58b 100644 --- a/crates/meilisearch/tests/upgrade/mod.rs +++ b/crates/meilisearch/tests/upgrade/mod.rs @@ -43,7 +43,7 @@ async fn version_too_old() { std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap(); let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); - snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.15.0"); + snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.16.0"); } #[actix_rt::test] @@ -58,7 +58,7 @@ async fn version_requires_downgrade() { std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap(); let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); - snapshot!(err, @"Database version 1.15.1 is higher than the Meilisearch version 1.15.0. Downgrade is not supported"); + snapshot!(err, @"Database version 1.16.1 is higher than the Meilisearch version 1.16.0. Downgrade is not supported"); } #[actix_rt::test] diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap index 1d89e6838..f4edae51b 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "stats": { "totalNbTasks": 1, @@ -24,7 +24,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task" + "batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type." }, { "uid": 23, @@ -47,7 +47,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.004146631S", "startedAt": "2025-01-23T11:38:57.012591321Z", "finishedAt": "2025-01-23T11:38:57.016737952Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 22, @@ -71,7 +71,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.102738497S", "startedAt": "2025-01-23T11:36:22.551906856Z", "finishedAt": "2025-01-23T11:36:22.654645353Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 21, @@ -95,7 +95,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.005108474S", "startedAt": "2025-01-23T11:36:04.132670526Z", "finishedAt": "2025-01-23T11:36:04.137779Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 20, @@ -119,7 +119,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.027954894S", "startedAt": "2025-01-23T11:35:53.631082795Z", "finishedAt": "2025-01-23T11:35:53.659037689Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 19, @@ -142,7 +142,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.006903297S", "startedAt": "2025-01-20T11:50:52.874106134Z", "finishedAt": "2025-01-20T11:50:52.881009431Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 18, @@ -171,7 +171,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000481257S", "startedAt": "2025-01-20T11:48:04.92820416Z", "finishedAt": "2025-01-20T11:48:04.928685417Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 17, @@ -194,7 +194,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000407005S", "startedAt": "2025-01-20T11:47:53.509403957Z", "finishedAt": "2025-01-20T11:47:53.509810962Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 16, @@ -217,7 +217,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000403716S", "startedAt": "2025-01-20T11:47:48.430653005Z", "finishedAt": "2025-01-20T11:47:48.431056721Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 15, @@ -240,7 +240,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000417016S", "startedAt": "2025-01-20T11:47:42.429678617Z", "finishedAt": "2025-01-20T11:47:42.430095633Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 14, @@ -264,7 +264,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT12.086284842S", "startedAt": "2025-01-20T11:47:03.092181576Z", "finishedAt": "2025-01-20T11:47:15.178466418Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 13, @@ -296,7 +296,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.011506614S", "startedAt": "2025-01-16T17:18:43.29334923Z", "finishedAt": "2025-01-16T17:18:43.304855844Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 12, @@ -324,7 +324,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007640163S", "startedAt": "2025-01-16T17:02:52.539749853Z", "finishedAt": "2025-01-16T17:02:52.547390016Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 11, @@ -347,7 +347,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007307840S", "startedAt": "2025-01-16T17:01:14.112756687Z", "finishedAt": "2025-01-16T17:01:14.120064527Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 10, @@ -375,7 +375,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007391353S", "startedAt": "2025-01-16T17:00:29.201180268Z", "finishedAt": "2025-01-16T17:00:29.208571621Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 9, @@ -403,7 +403,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007445825S", "startedAt": "2025-01-16T17:00:15.77629445Z", "finishedAt": "2025-01-16T17:00:15.783740275Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 8, @@ -436,7 +436,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.012020083S", "startedAt": "2025-01-16T16:59:42.744086671Z", "finishedAt": "2025-01-16T16:59:42.756106754Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 7, @@ -463,7 +463,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007440092S", "startedAt": "2025-01-16T16:58:41.2155771Z", "finishedAt": "2025-01-16T16:58:41.223017192Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 6, @@ -490,7 +490,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007565161S", "startedAt": "2025-01-16T16:54:51.940332781Z", "finishedAt": "2025-01-16T16:54:51.947897942Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 5, @@ -516,7 +516,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.016307263S", "startedAt": "2025-01-16T16:53:19.913351957Z", "finishedAt": "2025-01-16T16:53:19.92965922Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 23, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap index 1d89e6838..f4edae51b 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "stats": { "totalNbTasks": 1, @@ -24,7 +24,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task" + "batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type." }, { "uid": 23, @@ -47,7 +47,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.004146631S", "startedAt": "2025-01-23T11:38:57.012591321Z", "finishedAt": "2025-01-23T11:38:57.016737952Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 22, @@ -71,7 +71,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.102738497S", "startedAt": "2025-01-23T11:36:22.551906856Z", "finishedAt": "2025-01-23T11:36:22.654645353Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 21, @@ -95,7 +95,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.005108474S", "startedAt": "2025-01-23T11:36:04.132670526Z", "finishedAt": "2025-01-23T11:36:04.137779Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 20, @@ -119,7 +119,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.027954894S", "startedAt": "2025-01-23T11:35:53.631082795Z", "finishedAt": "2025-01-23T11:35:53.659037689Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 19, @@ -142,7 +142,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.006903297S", "startedAt": "2025-01-20T11:50:52.874106134Z", "finishedAt": "2025-01-20T11:50:52.881009431Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 18, @@ -171,7 +171,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000481257S", "startedAt": "2025-01-20T11:48:04.92820416Z", "finishedAt": "2025-01-20T11:48:04.928685417Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 17, @@ -194,7 +194,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000407005S", "startedAt": "2025-01-20T11:47:53.509403957Z", "finishedAt": "2025-01-20T11:47:53.509810962Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 16, @@ -217,7 +217,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000403716S", "startedAt": "2025-01-20T11:47:48.430653005Z", "finishedAt": "2025-01-20T11:47:48.431056721Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 15, @@ -240,7 +240,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000417016S", "startedAt": "2025-01-20T11:47:42.429678617Z", "finishedAt": "2025-01-20T11:47:42.430095633Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 14, @@ -264,7 +264,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT12.086284842S", "startedAt": "2025-01-20T11:47:03.092181576Z", "finishedAt": "2025-01-20T11:47:15.178466418Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 13, @@ -296,7 +296,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.011506614S", "startedAt": "2025-01-16T17:18:43.29334923Z", "finishedAt": "2025-01-16T17:18:43.304855844Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 12, @@ -324,7 +324,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007640163S", "startedAt": "2025-01-16T17:02:52.539749853Z", "finishedAt": "2025-01-16T17:02:52.547390016Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 11, @@ -347,7 +347,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007307840S", "startedAt": "2025-01-16T17:01:14.112756687Z", "finishedAt": "2025-01-16T17:01:14.120064527Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 10, @@ -375,7 +375,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007391353S", "startedAt": "2025-01-16T17:00:29.201180268Z", "finishedAt": "2025-01-16T17:00:29.208571621Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 9, @@ -403,7 +403,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007445825S", "startedAt": "2025-01-16T17:00:15.77629445Z", "finishedAt": "2025-01-16T17:00:15.783740275Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 8, @@ -436,7 +436,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.012020083S", "startedAt": "2025-01-16T16:59:42.744086671Z", "finishedAt": "2025-01-16T16:59:42.756106754Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 7, @@ -463,7 +463,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007440092S", "startedAt": "2025-01-16T16:58:41.2155771Z", "finishedAt": "2025-01-16T16:58:41.223017192Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 6, @@ -490,7 +490,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007565161S", "startedAt": "2025-01-16T16:54:51.940332781Z", "finishedAt": "2025-01-16T16:54:51.947897942Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 5, @@ -516,7 +516,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.016307263S", "startedAt": "2025-01-16T16:53:19.913351957Z", "finishedAt": "2025-01-16T16:53:19.92965922Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 23, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap index 1d89e6838..f4edae51b 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "stats": { "totalNbTasks": 1, @@ -24,7 +24,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task" + "batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type." }, { "uid": 23, @@ -47,7 +47,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.004146631S", "startedAt": "2025-01-23T11:38:57.012591321Z", "finishedAt": "2025-01-23T11:38:57.016737952Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 22, @@ -71,7 +71,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.102738497S", "startedAt": "2025-01-23T11:36:22.551906856Z", "finishedAt": "2025-01-23T11:36:22.654645353Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 21, @@ -95,7 +95,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.005108474S", "startedAt": "2025-01-23T11:36:04.132670526Z", "finishedAt": "2025-01-23T11:36:04.137779Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 20, @@ -119,7 +119,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.027954894S", "startedAt": "2025-01-23T11:35:53.631082795Z", "finishedAt": "2025-01-23T11:35:53.659037689Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 19, @@ -142,7 +142,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.006903297S", "startedAt": "2025-01-20T11:50:52.874106134Z", "finishedAt": "2025-01-20T11:50:52.881009431Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 18, @@ -171,7 +171,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000481257S", "startedAt": "2025-01-20T11:48:04.92820416Z", "finishedAt": "2025-01-20T11:48:04.928685417Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 17, @@ -194,7 +194,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000407005S", "startedAt": "2025-01-20T11:47:53.509403957Z", "finishedAt": "2025-01-20T11:47:53.509810962Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 16, @@ -217,7 +217,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000403716S", "startedAt": "2025-01-20T11:47:48.430653005Z", "finishedAt": "2025-01-20T11:47:48.431056721Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 15, @@ -240,7 +240,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000417016S", "startedAt": "2025-01-20T11:47:42.429678617Z", "finishedAt": "2025-01-20T11:47:42.430095633Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 14, @@ -264,7 +264,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT12.086284842S", "startedAt": "2025-01-20T11:47:03.092181576Z", "finishedAt": "2025-01-20T11:47:15.178466418Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 13, @@ -296,7 +296,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.011506614S", "startedAt": "2025-01-16T17:18:43.29334923Z", "finishedAt": "2025-01-16T17:18:43.304855844Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 12, @@ -324,7 +324,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007640163S", "startedAt": "2025-01-16T17:02:52.539749853Z", "finishedAt": "2025-01-16T17:02:52.547390016Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 11, @@ -347,7 +347,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007307840S", "startedAt": "2025-01-16T17:01:14.112756687Z", "finishedAt": "2025-01-16T17:01:14.120064527Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 10, @@ -375,7 +375,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007391353S", "startedAt": "2025-01-16T17:00:29.201180268Z", "finishedAt": "2025-01-16T17:00:29.208571621Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 9, @@ -403,7 +403,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007445825S", "startedAt": "2025-01-16T17:00:15.77629445Z", "finishedAt": "2025-01-16T17:00:15.783740275Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 8, @@ -436,7 +436,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.012020083S", "startedAt": "2025-01-16T16:59:42.744086671Z", "finishedAt": "2025-01-16T16:59:42.756106754Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 7, @@ -463,7 +463,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007440092S", "startedAt": "2025-01-16T16:58:41.2155771Z", "finishedAt": "2025-01-16T16:58:41.223017192Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 6, @@ -490,7 +490,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007565161S", "startedAt": "2025-01-16T16:54:51.940332781Z", "finishedAt": "2025-01-16T16:54:51.947897942Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 5, @@ -516,7 +516,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.016307263S", "startedAt": "2025-01-16T16:53:19.913351957Z", "finishedAt": "2025-01-16T16:53:19.92965922Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 23, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_batchUids_equal_10.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_batchUids_equal_10.snap index 341085c87..7688b3c36 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_batchUids_equal_10.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_batchUids_equal_10.snap @@ -29,7 +29,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41.snap index 15ae9c34d..78a98cb3d 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41.snap @@ -25,7 +25,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 0, @@ -49,7 +49,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.111055654S", "startedAt": "2025-01-16T16:45:16.020248085Z", "finishedAt": "2025-01-16T16:45:16.131303739Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 2, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41.snap index 15ae9c34d..78a98cb3d 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41.snap @@ -25,7 +25,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 0, @@ -49,7 +49,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.111055654S", "startedAt": "2025-01-16T16:45:16.020248085Z", "finishedAt": "2025-01-16T16:45:16.131303739Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 2, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41.snap index 15ae9c34d..78a98cb3d 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41.snap @@ -25,7 +25,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 0, @@ -49,7 +49,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.111055654S", "startedAt": "2025-01-16T16:45:16.020248085Z", "finishedAt": "2025-01-16T16:45:16.131303739Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 2, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_canceledBy_equal_19.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_canceledBy_equal_19.snap index 04795c285..9dafa709b 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_canceledBy_equal_19.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_canceledBy_equal_19.snap @@ -30,7 +30,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_statuses_equal_canceled.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_statuses_equal_canceled.snap index 04795c285..9dafa709b 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_statuses_equal_canceled.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_statuses_equal_canceled.snap @@ -30,7 +30,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_uids_equal_10.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_uids_equal_10.snap index 341085c87..7688b3c36 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_uids_equal_10.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_uids_equal_10.snap @@ -29,7 +29,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap index 480f85bdb..01d2ea341 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap index 480f85bdb..01d2ea341 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap index 480f85bdb..01d2ea341 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap index 068dd0d82..fb62b35da 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "stats": { "totalNbTasks": 1, @@ -24,7 +24,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "[duration]", "startedAt": "[date]", "finishedAt": "[date]", - "batchCreationComplete": "a batch of tasks of type `upgradeDatabase` cannot be batched with any other type of task" + "batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type." }, { "uid": 23, @@ -47,7 +47,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.004146631S", "startedAt": "2025-01-23T11:38:57.012591321Z", "finishedAt": "2025-01-23T11:38:57.016737952Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 22, @@ -71,7 +71,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.102738497S", "startedAt": "2025-01-23T11:36:22.551906856Z", "finishedAt": "2025-01-23T11:36:22.654645353Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 21, @@ -95,7 +95,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.005108474S", "startedAt": "2025-01-23T11:36:04.132670526Z", "finishedAt": "2025-01-23T11:36:04.137779Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 20, @@ -119,7 +119,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.027954894S", "startedAt": "2025-01-23T11:35:53.631082795Z", "finishedAt": "2025-01-23T11:35:53.659037689Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 19, @@ -142,7 +142,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.006903297S", "startedAt": "2025-01-20T11:50:52.874106134Z", "finishedAt": "2025-01-20T11:50:52.881009431Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 18, @@ -171,7 +171,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000481257S", "startedAt": "2025-01-20T11:48:04.92820416Z", "finishedAt": "2025-01-20T11:48:04.928685417Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 17, @@ -194,7 +194,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000407005S", "startedAt": "2025-01-20T11:47:53.509403957Z", "finishedAt": "2025-01-20T11:47:53.509810962Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 16, @@ -217,7 +217,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000403716S", "startedAt": "2025-01-20T11:47:48.430653005Z", "finishedAt": "2025-01-20T11:47:48.431056721Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 15, @@ -240,7 +240,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.000417016S", "startedAt": "2025-01-20T11:47:42.429678617Z", "finishedAt": "2025-01-20T11:47:42.430095633Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 14, @@ -264,7 +264,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT12.086284842S", "startedAt": "2025-01-20T11:47:03.092181576Z", "finishedAt": "2025-01-20T11:47:15.178466418Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 13, @@ -296,7 +296,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.011506614S", "startedAt": "2025-01-16T17:18:43.29334923Z", "finishedAt": "2025-01-16T17:18:43.304855844Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 12, @@ -324,7 +324,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007640163S", "startedAt": "2025-01-16T17:02:52.539749853Z", "finishedAt": "2025-01-16T17:02:52.547390016Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 11, @@ -347,7 +347,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007307840S", "startedAt": "2025-01-16T17:01:14.112756687Z", "finishedAt": "2025-01-16T17:01:14.120064527Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 10, @@ -375,7 +375,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007391353S", "startedAt": "2025-01-16T17:00:29.201180268Z", "finishedAt": "2025-01-16T17:00:29.208571621Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 9, @@ -403,7 +403,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007445825S", "startedAt": "2025-01-16T17:00:15.77629445Z", "finishedAt": "2025-01-16T17:00:15.783740275Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 8, @@ -436,7 +436,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.012020083S", "startedAt": "2025-01-16T16:59:42.744086671Z", "finishedAt": "2025-01-16T16:59:42.756106754Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 7, @@ -463,7 +463,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007440092S", "startedAt": "2025-01-16T16:58:41.2155771Z", "finishedAt": "2025-01-16T16:58:41.223017192Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 6, @@ -490,7 +490,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007565161S", "startedAt": "2025-01-16T16:54:51.940332781Z", "finishedAt": "2025-01-16T16:54:51.947897942Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 5, @@ -516,7 +516,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.016307263S", "startedAt": "2025-01-16T16:53:19.913351957Z", "finishedAt": "2025-01-16T16:53:19.92965922Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 4, @@ -540,7 +540,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.087655941S", "startedAt": "2025-01-16T16:52:32.631145531Z", "finishedAt": "2025-01-16T16:52:32.718801472Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 3, @@ -565,7 +565,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.007593573S", "startedAt": "2025-01-16T16:47:53.677901409Z", "finishedAt": "2025-01-16T16:47:53.685494982Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 2, @@ -591,7 +591,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.017769760S", "startedAt": "2025-01-16T16:47:41.211587682Z", "finishedAt": "2025-01-16T16:47:41.229357442Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 1, @@ -615,7 +615,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.066095506S", "startedAt": "2025-01-16T16:47:10.217299609Z", "finishedAt": "2025-01-16T16:47:10.283395115Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" }, { "uid": 0, @@ -639,7 +639,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "duration": "PT0.111055654S", "startedAt": "2025-01-16T16:45:16.020248085Z", "finishedAt": "2025-01-16T16:45:16.131303739Z", - "batchCreationComplete": "unspecified" + "batchStrategy": "unspecified" } ], "total": 25, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap index 5c409891c..abb4dcdd9 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.15.0" + "upgradeTo": "v1.16.0" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/vector/rest.rs b/crates/meilisearch/tests/vector/rest.rs index 82fc71b26..e03563bcc 100644 --- a/crates/meilisearch/tests/vector/rest.rs +++ b/crates/meilisearch/tests/vector/rest.rs @@ -1,7 +1,10 @@ use std::collections::BTreeMap; +use std::sync::atomic::AtomicUsize; +use std::time::Duration; use meili_snap::{json_string, snapshot}; use reqwest::IntoUrl; +use tokio::sync::mpsc; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, Request, ResponseTemplate}; @@ -334,6 +337,41 @@ async fn create_mock_raw() -> (MockServer, Value) { (mock_server, embedder_settings) } +async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (MockServer, Value) { + let mock_server = MockServer::start().await; + let count = AtomicUsize::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |_req: &Request| { + let count = count.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + + if count >= 5 { + let _ = sender.try_send(()); + ResponseTemplate::new(500) + .set_delay(Duration::from_secs(u64::MAX)) // Make the response hang forever + .set_body_string("Service Unavailable") + } else { + ResponseTemplate::new(500).set_body_string("Service Unavailable") + } + }) + .mount(&mock_server) + .await; + + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "dimensions": 3, + "request": "{{text}}", + "response": "{{embedding}}", + "documentTemplate": "{{doc.name}}" + }); + + (mock_server, embedder_settings) +} + pub async fn post(url: T, text: &str) -> reqwest::Result { reqwest::Client::builder().build()?.post(url).json(&json!(text)).send().await } @@ -370,13 +408,13 @@ async fn bad_request() { .await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" - { - "message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found", - "code": "vector_embedding_error", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#vector_embedding_error" - } - "###); + { + "message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); // A repeat string appears inside a repeated value let (response, code) = index @@ -399,7 +437,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.input.input`: \"{{..}}\" appears nested inside of a value that is itself repeated", + "message": "Error while generating embeddings: user error: in `request.input.input`: \"{{..}}\" appears nested inside of a value that is itself repeated\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -422,7 +460,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.input.repeat`: \"{{..}}\" appears outside of an array", + "message": "Error while generating embeddings: user error: in `request.input.repeat`: \"{{..}}\" appears outside of an array\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -445,7 +483,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #0", + "message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #0\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -468,7 +506,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #2", + "message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #2\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -491,7 +529,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.input[0]`: Expected \"{{text}}\" inside of the repeated value", + "message": "Error while generating embeddings: user error: in `request.input[0]`: Expected \"{{text}}\" inside of the repeated value\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -518,7 +556,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{..}}\", but it was already present in `request.input`", + "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{..}}\", but it was already present in `request.input`\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -539,7 +577,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input`", + "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input`\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -560,7 +598,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.repeated.data[1]`: Found \"{{text}}\", but it was already present in `request.repeated.input`", + "message": "Error while generating embeddings: user error: in `request.repeated.data[1]`: Found \"{{text}}\", but it was already present in `request.repeated.input`\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -581,7 +619,7 @@ async fn bad_request() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input[0]` (repeated)", + "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input[0]` (repeated)\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -882,7 +920,7 @@ async fn bad_settings() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found", + "message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -2111,3 +2149,71 @@ async fn searchable_reindex() { } "###); } + +#[actix_rt::test] +async fn last_error_stats() { + let (sender, mut receiver) = mpsc::channel(10); + let (_mock, setting) = create_faulty_mock_raw(sender).await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "will_return_500"}, + {"id": 1, "name": "will_error"}, + {"id": 2, "name": "must_error"}, + ]); + let (_value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + + // The task will eventually fail, so let's not wait for it. + // Let's just wait for the server's signal + receiver.recv().await; + + let (response, _code) = index.filtered_batches(&[], &[], &[]).await; + snapshot!(json_string!(response["results"][0], { + ".progress" => "[ignored]", + ".stats.embedderRequests.total" => "[ignored]", + ".stats.embedderRequests.failed" => "[ignored]", + ".startedAt" => "[ignored]" + }), @r#" + { + "uid": 1, + "progress": "[ignored]", + "details": { + "receivedDocuments": 3, + "indexedDocuments": null + }, + "stats": { + "totalNbTasks": 1, + "status": { + "processing": 1 + }, + "types": { + "documentAdditionOrUpdate": 1 + }, + "indexUids": { + "doggo": 1 + }, + "embedderRequests": { + "total": "[ignored]", + "failed": "[ignored]", + "lastError": "runtime error: received internal error HTTP 500 from embedding server\n - server replied with `Service Unavailable`" + } + }, + "duration": null, + "startedAt": "[ignored]", + "finishedAt": null, + "batchStrategy": "batched all enqueued tasks" + } + "#); +} diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml index 485177838..722f5f82b 100644 --- a/crates/meilitool/Cargo.toml +++ b/crates/meilitool/Cargo.toml @@ -9,15 +9,15 @@ edition.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.95" -clap = { version = "4.5.24", features = ["derive"] } +anyhow = "1.0.98" +clap = { version = "4.5.40", features = ["derive"] } dump = { path = "../dump" } file-store = { path = "../file-store" } -indexmap = { version = "2.7.0", features = ["serde"] } +indexmap = { version = "2.9.0", features = ["serde"] } meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.135", features = ["preserve_order"] } -tempfile = "3.15.0" -time = { version = "0.3.37", features = ["formatting", "parsing", "alloc"] } -uuid = { version = "1.11.0", features = ["v4"], default-features = false } +serde = { version = "1.0.219", features = ["derive"] } +serde_json = { version = "1.0.140", features = ["preserve_order"] } +tempfile = "3.20.0" +time = { version = "0.3.41", features = ["formatting", "parsing", "alloc"] } +uuid = { version = "1.17.0", features = ["v4"], default-features = false } diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index dd1213782..b967e620c 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -545,7 +545,6 @@ fn export_documents( let rtxn = index.read_txn()?; let fields_ids_map = index.fields_ids_map(&rtxn)?; let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let embedding_configs = index.embedding_configs(&rtxn)?; if let Some(offset) = offset { eprintln!("Skipping {offset} documents"); @@ -592,17 +591,12 @@ fn export_documents( .into()); }; - for (embedder_name, embeddings) in embeddings { - let user_provided = embedding_configs - .iter() - .find(|conf| conf.name == embedder_name) - .is_some_and(|conf| conf.user_provided.contains(id)); - + for (embedder_name, (embeddings, regenerate)) in embeddings { let embeddings = ExplicitVectors { embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( embeddings, )), - regenerate: !user_provided, + regenerate, }; vectors .insert(embedder_name, serde_json::to_value(embeddings).unwrap()); diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 909a5f8f9..3d08252ac 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -15,15 +15,15 @@ license.workspace = true big_s = "1.0.2" bimap = { version = "0.6.3", features = ["serde"] } bincode = "1.3.3" -bstr = "1.11.3" -bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] } +bstr = "1.12.0" +bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] } byteorder = "1.5.0" -charabia = { version = "0.9.3", default-features = false } +charabia = { version = "0.9.6", default-features = false } concat-arrays = "0.1.2" -convert_case = "0.6.0" +convert_case = "0.8.0" crossbeam-channel = "0.5.15" deserr = "0.6.3" -either = { version = "1.13.0", features = ["serde"] } +either = { version = "1.15.0", features = ["serde"] } flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" fxhash = "0.2.1" @@ -36,32 +36,32 @@ heed = { version = "0.22.0", default-features = false, features = [ "serde-json", "serde-bincode", ] } -indexmap = { version = "2.7.0", features = ["serde"] } +indexmap = { version = "2.9.0", features = ["serde"] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } -memchr = "2.7.4" +memchr = "2.7.5" memmap2 = "0.9.5" obkv = "0.3.0" -once_cell = "1.20.2" -ordered-float = "4.6.0" +once_cell = "1.21.3" +ordered-float = "5.0.0" rayon = "1.10.0" -roaring = { version = "0.10.10", features = ["serde"] } +roaring = { version = "0.10.12", features = ["serde"] } rstar = { version = "0.12.2", features = ["serde"] } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = { version = "1.0.135", features = ["preserve_order", "raw_value"] } +serde = { version = "1.0.219", features = ["derive"] } +serde_json = { version = "1.0.140", features = ["preserve_order", "raw_value"] } slice-group-by = "0.3.1" smallstr = { version = "0.3.0", features = ["serde"] } -smallvec = "1.13.2" +smallvec = "1.15.1" smartstring = "1.0.1" -tempfile = "3.15.0" -thiserror = "2.0.9" -time = { version = "0.3.37", features = [ +tempfile = "3.20.0" +thiserror = "2.0.12" +time = { version = "0.3.41", features = [ "serde-well-known", "formatting", "parsing", "macros", ] } -uuid = { version = "1.11.0", features = ["v4"] } +uuid = { version = "1.17.0", features = ["v4"] } filter-parser = { path = "../filter-parser" } @@ -69,18 +69,18 @@ filter-parser = { path = "../filter-parser" } itertools = "0.14.0" csv = "1.3.1" -candle-core = { version = "0.8.2" } -candle-transformers = { version = "0.8.2" } -candle-nn = { version = "0.8.2" } +candle-core = { version = "0.9.1" } +candle-transformers = { version = "0.9.1" } +candle-nn = { version = "0.9.1" } tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ "onig", ] } hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ "online", ] } -tiktoken-rs = "0.6.0" -liquid = "0.26.9" -rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [ +tiktoken-rs = "0.7.0" +liquid = "0.26.11" +rhai = { version = "1.22.2", features = [ "serde", "no_module", "no_custom_syntax", @@ -92,28 +92,26 @@ rand = "0.8.5" tracing = "0.1.41" ureq = { version = "2.12.1", features = ["json"] } url = "2.5.4" -rayon-par-bridge = "0.1.0" -hashbrown = "0.15.2" -bumpalo = "3.16.0" +hashbrown = "0.15.4" +bumpalo = "3.18.1" bumparaw-collections = "0.1.4" -thread_local = "1.1.8" -allocator-api2 = "0.2.21" -rustc-hash = "2.1.0" -uell = "0.1.0" +thread_local = "1.1.9" +allocator-api2 = "0.3.0" +rustc-hash = "2.1.1" enum-iterator = "2.1.0" bbqueue = { git = "https://github.com/meilisearch/bbqueue" } flume = { version = "0.11.1", default-features = false } -utoipa = { version = "5.3.1", features = [ +utoipa = { version = "5.4.0", features = [ "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions", ] } -lru = "0.13.0" +lru = "0.14.0" [dev-dependencies] -mimalloc = { version = "0.1.43", default-features = false } +mimalloc = { version = "0.1.47", default-features = false } # fixed version due to format breakages in v1.40 insta = "=1.39.0" maplit = "1.0.2" diff --git a/crates/milli/src/database_stats.rs b/crates/milli/src/database_stats.rs index 7da1fbd2b..381408621 100644 --- a/crates/milli/src/database_stats.rs +++ b/crates/milli/src/database_stats.rs @@ -1,9 +1,6 @@ use std::mem; -use heed::Database; -use heed::DatabaseStat; -use heed::RoTxn; -use heed::Unspecified; +use heed::{Database, DatabaseStat, RoTxn, Unspecified}; use serde::{Deserialize, Serialize}; use crate::BEU32; diff --git a/crates/milli/src/disabled_typos_terms.rs b/crates/milli/src/disabled_typos_terms.rs index 3a0d0c0f5..c5acad7cd 100644 --- a/crates/milli/src/disabled_typos_terms.rs +++ b/crates/milli/src/disabled_typos_terms.rs @@ -1,10 +1,9 @@ -use heed::{ - types::{SerdeJson, Str}, - RoTxn, RwTxn, -}; +use heed::types::{SerdeJson, Str}; +use heed::{RoTxn, RwTxn}; use serde::{Deserialize, Serialize}; -use crate::{index::main_key, Index}; +use crate::index::main_key; +use crate::Index; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(rename_all = "camelCase")] @@ -33,13 +32,6 @@ impl Index { Ok(()) } - - pub(crate) fn delete_disabled_typos_terms(&self, txn: &mut RwTxn<'_>) -> heed::Result<()> { - self.main - .remap_types::>() - .delete(txn, main_key::DISABLED_TYPOS_TERMS)?; - Ok(()) - } } impl DisabledTyposTerms { diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 237a895d3..f8886da8e 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -1,5 +1,4 @@ -use std::collections::BTreeSet; -use std::collections::HashMap; +use std::collections::{BTreeSet, HashMap}; use std::convert::Infallible; use std::fmt::Write; use std::{io, str}; @@ -289,6 +288,8 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError), #[error("Too many embedders in the configuration. Found {0}, but limited to 256.")] TooManyEmbedders(usize), + #[error("Too many fragments in the configuration. Found {0}, but limited to 256.")] + TooManyFragments(usize), #[error("Cannot find embedder with name `{0}`.")] InvalidSearchEmbedder(String), #[error("Cannot find embedder with name `{0}`.")] @@ -387,6 +388,8 @@ and can not be more than 511 bytes.", .document_id.to_string() DocumentEditionRuntimeError(Box), #[error("Document edition runtime error encountered while compiling the function: {0}")] DocumentEditionCompilationError(rhai::ParseError), + #[error("`.chat.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] + InvalidChatSettingsDocumentTemplateMaxBytes, #[error("{0}")] DocumentEmbeddingError(String), } diff --git a/crates/milli/src/external_documents_ids.rs b/crates/milli/src/external_documents_ids.rs index 755b801ec..598465e5f 100644 --- a/crates/milli/src/external_documents_ids.rs +++ b/crates/milli/src/external_documents_ids.rs @@ -32,13 +32,13 @@ impl ExternalDocumentsIds { &self, rtxn: &RoTxn<'_>, external_id: A, - ) -> heed::Result> { + ) -> heed::Result> { self.0.get(rtxn, external_id.as_ref()) } /// An helper function to debug this type, returns an `HashMap` of both, /// soft and hard fst maps, combined. - pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result> { + pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result> { let mut map = HashMap::default(); for result in self.0.iter(rtxn)? { let (external, internal) = result?; diff --git a/crates/milli/src/fields_ids_map.rs b/crates/milli/src/fields_ids_map.rs index 9a016e7bd..d2abd840c 100644 --- a/crates/milli/src/fields_ids_map.rs +++ b/crates/milli/src/fields_ids_map.rs @@ -7,6 +7,7 @@ use crate::FieldId; mod global; pub mod metadata; pub use global::GlobalFieldsIdsMap; +pub use metadata::{FieldIdMapWithMetadata, MetadataBuilder}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FieldsIdsMap { diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index 53af30fd6..ae1a9755a 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -1,13 +1,12 @@ +use std::collections::{BTreeSet, HashSet}; + use deserr::{DeserializeError, Deserr, ValuePointerRef}; use serde::{Deserialize, Serialize}; -use std::collections::{BTreeSet, HashSet}; use utoipa::ToSchema; -use crate::{ - attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}, - constants::RESERVED_GEO_FIELD_NAME, - AttributePatterns, -}; +use crate::attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}; +use crate::constants::RESERVED_GEO_FIELD_NAME; +use crate::AttributePatterns; #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)] #[serde(untagged)] diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index d0cd5c862..b2ec992ba 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1,14 +1,18 @@ use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::error::Error; +use std::fmt; use std::fs::File; use std::path::Path; +use deserr::Deserr; use heed::types::*; use heed::{CompactionOption, Database, DatabaseStat, RoTxn, RwTxn, Unspecified, WithoutTls}; use indexmap::IndexMap; use roaring::RoaringBitmap; use rstar::RTree; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::database_stats::DatabaseStats; @@ -23,8 +27,11 @@ use crate::heed_codec::facet::{ use crate::heed_codec::version::VersionCodec; use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; +use crate::prompt::PromptData; use crate::proximity::ProximityPrecision; -use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig}; +use crate::update::new::StdResult; +use crate::vector::db::IndexEmbeddingConfigs; +use crate::vector::{ArroyStats, ArroyWrapper, Embedding}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -79,6 +86,7 @@ pub mod main_key { pub const PREFIX_SEARCH: &str = "prefix_search"; pub const DOCUMENTS_STATS: &str = "documents_stats"; pub const DISABLED_TYPOS_TERMS: &str = "disabled_typos_terms"; + pub const CHAT: &str = "chat"; } pub mod db_name { @@ -170,7 +178,7 @@ pub struct Index { pub field_id_docid_facet_strings: Database, /// Maps an embedder name to its id in the arroy store. - pub embedder_category_id: Database, + pub(crate) embedder_category_id: Database, /// Vector store based on arroy™. pub vector_arroy: arroy::Database, @@ -1691,6 +1699,25 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::FACET_SEARCH) } + pub fn chat_config(&self, txn: &RoTxn<'_>) -> heed::Result { + self.main + .remap_types::>() + .get(txn, main_key::CHAT) + .map(|o| o.unwrap_or_default()) + } + + pub(crate) fn put_chat_config( + &self, + txn: &mut RwTxn<'_>, + val: &ChatConfig, + ) -> heed::Result<()> { + self.main.remap_types::>().put(txn, main_key::CHAT, &val) + } + + pub(crate) fn delete_chat_config(&self, txn: &mut RwTxn<'_>) -> heed::Result { + self.main.remap_key_type::().delete(txn, main_key::CHAT) + } + pub fn localized_attributes_rules( &self, rtxn: &RoTxn<'_>, @@ -1719,34 +1746,6 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::LOCALIZED_ATTRIBUTES_RULES) } - /// Put the embedding configs: - /// 1. The name of the embedder - /// 2. The configuration option for this embedder - /// 3. The list of documents with a user provided embedding - pub(crate) fn put_embedding_configs( - &self, - wtxn: &mut RwTxn<'_>, - configs: Vec, - ) -> heed::Result<()> { - self.main.remap_types::>>().put( - wtxn, - main_key::EMBEDDING_CONFIGS, - &configs, - ) - } - - pub(crate) fn delete_embedding_configs(&self, wtxn: &mut RwTxn<'_>) -> heed::Result { - self.main.remap_key_type::().delete(wtxn, main_key::EMBEDDING_CONFIGS) - } - - pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> Result> { - Ok(self - .main - .remap_types::>>() - .get(rtxn, main_key::EMBEDDING_CONFIGS)? - .unwrap_or_default()) - } - pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { self.main.remap_types::().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff) } @@ -1759,19 +1758,29 @@ impl Index { self.main.remap_key_type::().delete(wtxn, main_key::SEARCH_CUTOFF) } + pub fn embedding_configs(&self) -> IndexEmbeddingConfigs { + IndexEmbeddingConfigs::new(self.main, self.embedder_category_id) + } + pub fn embeddings( &self, rtxn: &RoTxn<'_>, docid: DocumentId, - ) -> Result>> { + ) -> Result, bool)>> { let mut res = BTreeMap::new(); - let embedding_configs = self.embedding_configs(rtxn)?; - for config in embedding_configs { - let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); - let reader = - ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized()); + let embedders = self.embedding_configs(); + for config in embedders.embedding_configs(rtxn)? { + let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap(); + let reader = ArroyWrapper::new( + self.vector_arroy, + embedder_info.embedder_id, + config.config.quantized(), + ); let embeddings = reader.item_vectors(rtxn, docid)?; - res.insert(config.name.to_owned(), embeddings); + res.insert( + config.name.to_owned(), + (embeddings, embedder_info.embedding_status.must_regenerate(docid)), + ); } Ok(res) } @@ -1783,9 +1792,9 @@ impl Index { pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result { let mut stats = ArroyStats::default(); - let embedding_configs = self.embedding_configs(rtxn)?; - for config in embedding_configs { - let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); + let embedding_configs = self.embedding_configs(); + for config in embedding_configs.embedding_configs(rtxn)? { + let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap(); let reader = ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized()); reader.aggregate_stats(rtxn, &mut stats)?; @@ -1910,20 +1919,99 @@ impl Index { } } -#[derive(Debug, Deserialize, Serialize)] -pub struct IndexEmbeddingConfig { - pub name: String, - pub config: EmbeddingConfig, - pub user_provided: RoaringBitmap, +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct ChatConfig { + pub description: String, + /// Contains the document template and max template length. + pub prompt: PromptData, + pub search_parameters: SearchParameters, +} + +#[derive(Debug, Default, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct SearchParameters { + #[serde(skip_serializing_if = "Option::is_none")] + pub hybrid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub limit: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub sort: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub distinct: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub matching_strategy: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub attributes_to_search_on: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub ranking_score_threshold: Option, +} + +#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Deserr, ToSchema)] +#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSettingsRankingScoreThreshold)] +pub struct RankingScoreThreshold(f64); + +impl RankingScoreThreshold { + pub fn as_f64(&self) -> f64 { + self.0 + } +} + +impl TryFrom for RankingScoreThreshold { + type Error = InvalidSettingsRankingScoreThreshold; + + fn try_from(value: f64) -> StdResult { + if !(0.0..=1.0).contains(&value) { + Err(InvalidSettingsRankingScoreThreshold) + } else { + Ok(RankingScoreThreshold(value)) + } + } +} + +#[derive(Debug)] +pub struct InvalidSettingsRankingScoreThreshold; + +impl Error for InvalidSettingsRankingScoreThreshold {} + +impl fmt::Display for InvalidSettingsRankingScoreThreshold { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`." + ) + } +} + +#[derive(Debug, Clone, Default, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct HybridQuery { + pub semantic_ratio: f32, + pub embedder: String, } #[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] pub struct PrefixSettings { pub prefix_count_threshold: usize, pub max_prefix_length: usize, pub compute_prefixes: PrefixSearch, } +/// This is unfortunately a duplication of the struct in . +/// The reason why it is duplicated is because milli cannot depend on meilisearch. It would be cyclic imports. +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize, Deserialize)] +#[deserr(rename_all = camelCase)] +#[serde(rename_all = "camelCase")] +pub enum MatchingStrategy { + /// Remove query words from last to first + #[default] + Last, + /// All query words are mandatory + All, + /// Remove query words from the most frequent to the least + Frequency, +} + #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(rename_all = "camelCase")] pub enum PrefixSearch { diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 47d3dc75c..504b4c68d 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -52,18 +52,19 @@ pub use search::new::{ }; use serde_json::Value; pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; -pub use {charabia as tokenizer, heed, rhai}; +pub use {arroy, charabia as tokenizer, heed, rhai}; pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError}; -pub use self::attribute_patterns::AttributePatterns; -pub use self::attribute_patterns::PatternMatch; +pub use self::attribute_patterns::{AttributePatterns, PatternMatch}; pub use self::criterion::{default_criteria, Criterion, CriterionError}; pub use self::error::{ Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, }; pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::fieldids_weights_map::FieldidsWeightsMap; -pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap}; +pub use self::fields_ids_map::{ + FieldIdMapWithMetadata, FieldsIdsMap, GlobalFieldsIdsMap, MetadataBuilder, +}; pub use self::filterable_attributes_rules::{ FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns, FilterableAttributesRule, @@ -84,8 +85,6 @@ pub use self::search::{ }; pub use self::update::ChannelCongestion; -pub use arroy; - pub type Result = std::result::Result; pub type Attribute = u32; diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs index 75dafa8ec..61c61cd49 100644 --- a/crates/milli/src/progress.rs +++ b/crates/milli/src/progress.rs @@ -1,11 +1,11 @@ -use enum_iterator::Sequence; use std::any::TypeId; use std::borrow::Cow; use std::marker::PhantomData; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; use std::sync::{Arc, RwLock}; use std::time::{Duration, Instant}; +use enum_iterator::Sequence; use indexmap::IndexMap; use itertools::Itertools; use serde::Serialize; @@ -22,6 +22,25 @@ pub struct Progress { steps: Arc>, } +#[derive(Default)] +pub struct EmbedderStats { + pub errors: Arc, u32)>>, + pub total_count: AtomicUsize, +} + +impl std::fmt::Debug for EmbedderStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let guard = self.errors.read().unwrap_or_else(|p| p.into_inner()); + let (error, count) = (guard.0.clone(), guard.1); + std::mem::drop(guard); + f.debug_struct("EmbedderStats") + .field("last_error", &error) + .field("total_count", &self.total_count.load(Ordering::Relaxed)) + .field("error_count", &count) + .finish() + } +} + #[derive(Default)] struct InnerProgress { /// The hierarchy of steps. diff --git a/crates/milli/src/prompt/context.rs b/crates/milli/src/prompt/context.rs index 84523333a..8958cb693 100644 --- a/crates/milli/src/prompt/context.rs +++ b/crates/milli/src/prompt/context.rs @@ -6,12 +6,18 @@ use liquid::{ObjectView, ValueView}; #[derive(Debug, Clone)] pub struct Context<'a, D: ObjectView, F: ArrayView> { document: &'a D, - fields: &'a F, + fields: Option<&'a F>, } impl<'a, D: ObjectView, F: ArrayView> Context<'a, D, F> { pub fn new(document: &'a D, fields: &'a F) -> Self { - Self { document, fields } + Self { document, fields: Some(fields) } + } +} + +impl<'a, D: ObjectView> Context<'a, D, Vec> { + pub fn without_fields(document: &'a D) -> Self { + Self { document, fields: None } } } @@ -21,17 +27,27 @@ impl ObjectView for Context<'_, D, F> { } fn size(&self) -> i64 { - 2 + if self.fields.is_some() { + 2 + } else { + 1 + } } fn keys<'k>(&'k self) -> Box> + 'k> { - Box::new(["doc", "fields"].iter().map(|s| KStringCow::from_static(s))) + let keys = if self.fields.is_some() { + either::Either::Left(["doc", "fields"]) + } else { + either::Either::Right(["doc"]) + }; + + Box::new(keys.into_iter().map(KStringCow::from_static)) } fn values<'k>(&'k self) -> Box + 'k> { Box::new( std::iter::once(self.document.as_value()) - .chain(std::iter::once(self.fields.as_value())), + .chain(self.fields.iter().map(|fields| fields.as_value())), ) } @@ -40,13 +56,13 @@ impl ObjectView for Context<'_, D, F> { } fn contains_key(&self, index: &str) -> bool { - index == "doc" || index == "fields" + index == "doc" || (index == "fields" && self.fields.is_some()) } fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> { - match index { - "doc" => Some(self.document.as_value()), - "fields" => Some(self.fields.as_value()), + match (index, &self.fields) { + ("doc", _) => Some(self.document.as_value()), + ("fields", Some(fields)) => Some(fields.as_value()), _ => None, } } diff --git a/crates/milli/src/prompt/document.rs b/crates/milli/src/prompt/document.rs index b00c4cb42..1125c8fba 100644 --- a/crates/milli/src/prompt/document.rs +++ b/crates/milli/src/prompt/document.rs @@ -144,18 +144,19 @@ impl ValueView for Document<'_> { use crate::update::new::document::Document as DocumentTrait; #[derive(Debug)] -pub struct ParseableDocument<'doc, D> { +pub struct ParseableDocument<'a, 'doc, D: DocumentTrait<'a> + Debug> { document: D, doc_alloc: &'doc Bump, + _marker: std::marker::PhantomData<&'a ()>, } -impl<'doc, D> ParseableDocument<'doc, D> { +impl<'a, 'doc, D: DocumentTrait<'a> + Debug> ParseableDocument<'a, 'doc, D> { pub fn new(document: D, doc_alloc: &'doc Bump) -> Self { - Self { document, doc_alloc } + Self { document, doc_alloc, _marker: std::marker::PhantomData } } } -impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc, D> { +impl<'a, D: DocumentTrait<'a> + Debug> ObjectView for ParseableDocument<'a, '_, D> { fn as_value(&self) -> &dyn ValueView { self } @@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc } } -impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> { +impl<'a, D: DocumentTrait<'a> + Debug> ValueView for ParseableDocument<'a, '_, D> { fn as_debug(&self) -> &dyn Debug { self } diff --git a/crates/milli/src/prompt/error.rs b/crates/milli/src/prompt/error.rs index a92e2fdc3..03f3fb8a8 100644 --- a/crates/milli/src/prompt/error.rs +++ b/crates/milli/src/prompt/error.rs @@ -18,6 +18,7 @@ impl NewPromptError { Self { kind: NewPromptErrorKind::CannotParseTemplate(inner), fault: FaultSource::User } } + #[allow(unused)] // See for explanation pub(crate) fn invalid_fields_in_template(inner: liquid::Error) -> NewPromptError { Self { kind: NewPromptErrorKind::InvalidFieldsInTemplate(inner), fault: FaultSource::User } } @@ -27,6 +28,7 @@ impl NewPromptError { pub enum NewPromptErrorKind { #[error("cannot parse template: {0}")] CannotParseTemplate(liquid::Error), + #[allow(unused)] // See for explanation #[error("template contains invalid fields: {0}. Only `doc.*`, `fields[i].name`, `fields[i].value` are supported")] InvalidFieldsInTemplate(liquid::Error), } diff --git a/crates/milli/src/prompt/fields.rs b/crates/milli/src/prompt/fields.rs index 8d006f0b7..5a842268c 100644 --- a/crates/milli/src/prompt/fields.rs +++ b/crates/milli/src/prompt/fields.rs @@ -121,10 +121,10 @@ impl ObjectView for FieldValue<'_, D> { pub struct OwnedFields<'a, D: ObjectView>(Vec>); #[derive(Debug)] -pub struct BorrowedFields<'a, 'map, D: ObjectView> { +pub struct BorrowedFields<'a, 'doc, 'map, D: ObjectView> { document: &'a D, field_id_map: &'a RefCell>, - doc_alloc: &'a Bump, + doc_alloc: &'doc Bump, } impl<'a, D: ObjectView> OwnedFields<'a, D> { @@ -138,11 +138,11 @@ impl<'a, D: ObjectView> OwnedFields<'a, D> { } } -impl<'a, 'map, D: ObjectView> BorrowedFields<'a, 'map, D> { +impl<'a, 'doc, 'map, D: ObjectView> BorrowedFields<'a, 'doc, 'map, D> { pub fn new( document: &'a D, field_id_map: &'a RefCell>, - doc_alloc: &'a Bump, + doc_alloc: &'doc Bump, ) -> Self { Self { document, field_id_map, doc_alloc } } @@ -170,7 +170,7 @@ impl ArrayView for OwnedFields<'_, D> { } } -impl ArrayView for BorrowedFields<'_, '_, D> { +impl ArrayView for BorrowedFields<'_, '_, '_, D> { fn as_value(&self) -> &dyn ValueView { self } @@ -212,7 +212,7 @@ impl ArrayView for BorrowedFields<'_, '_, D> { } } -impl ValueView for BorrowedFields<'_, '_, D> { +impl ValueView for BorrowedFields<'_, '_, '_, D> { fn as_debug(&self) -> &dyn std::fmt::Debug { self } @@ -288,11 +288,11 @@ impl ValueView for OwnedFields<'_, D> { } } -struct ArraySource<'a, 'map, D: ObjectView> { - s: &'a BorrowedFields<'a, 'map, D>, +struct ArraySource<'a, 'doc, 'map, D: ObjectView> { + s: &'a BorrowedFields<'a, 'doc, 'map, D>, } -impl fmt::Display for ArraySource<'_, '_, D> { +impl fmt::Display for ArraySource<'_, '_, '_, D> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "[")?; for item in self.s.values() { @@ -303,11 +303,11 @@ impl fmt::Display for ArraySource<'_, '_, D> { } } -struct ArrayRender<'a, 'map, D: ObjectView> { - s: &'a BorrowedFields<'a, 'map, D>, +struct ArrayRender<'a, 'doc, 'map, D: ObjectView> { + s: &'a BorrowedFields<'a, 'doc, 'map, D>, } -impl fmt::Display for ArrayRender<'_, '_, D> { +impl fmt::Display for ArrayRender<'_, '_, '_, D> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for item in self.s.values() { write!(f, "{}", item.render())?; diff --git a/crates/milli/src/prompt/mod.rs b/crates/milli/src/prompt/mod.rs index a5cb8de48..03b20a090 100644 --- a/crates/milli/src/prompt/mod.rs +++ b/crates/milli/src/prompt/mod.rs @@ -2,7 +2,6 @@ mod context; mod document; pub(crate) mod error; mod fields; -mod template_checker; use std::cell::RefCell; use std::convert::TryFrom; @@ -10,12 +9,11 @@ use std::fmt::Debug; use std::num::NonZeroUsize; use bumpalo::Bump; -use document::ParseableDocument; +pub(crate) use document::{Document, ParseableDocument}; use error::{NewPromptError, RenderPromptError}; -use fields::{BorrowedFields, OwnedFields}; +pub use fields::{BorrowedFields, OwnedFields}; -use self::context::Context; -use self::document::Document; +pub use self::context::Context; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::update::del_add::DelAdd; use crate::GlobalFieldsIdsMap; @@ -65,7 +63,7 @@ fn default_template() -> liquid::Template { new_template(default_template_text()).unwrap() } -fn default_template_text() -> &'static str { +pub fn default_template_text() -> &'static str { "{% for field in fields %}\ {% if field.is_searchable and field.value != nil %}\ {{ field.name }}: {{ field.value }}\n\ @@ -105,17 +103,12 @@ impl Prompt { max_bytes, }; - // render template with special object that's OK with `doc.*` and `fields.*` - this.template - .render(&template_checker::TemplateChecker) - .map_err(NewPromptError::invalid_fields_in_template)?; - Ok(this) } pub fn render_document< - 'a, // lifetime of the borrow of the document - 'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents + 'a, // lifetime of the borrow of the document + 'doc, // lifetime of the allocator, will live for an entire chunk of documents >( &self, external_docid: &str, @@ -206,6 +199,7 @@ mod test { } #[test] + #[ignore] // See for explanation fn template_missing_doc() { assert!(matches!( Prompt::new("{{title}}: {{overview}}".into(), None), @@ -236,6 +230,7 @@ mod test { } #[test] + #[ignore] // See for explanation fn template_fields_invalid() { assert!(matches!( // intentionally garbled field diff --git a/crates/milli/src/prompt/template_checker.rs b/crates/milli/src/prompt/template_checker.rs deleted file mode 100644 index 4cda4a70d..000000000 --- a/crates/milli/src/prompt/template_checker.rs +++ /dev/null @@ -1,301 +0,0 @@ -use liquid::model::{ - ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue, -}; -use liquid::{Object, ObjectView, ValueView}; - -#[derive(Debug)] -pub struct TemplateChecker; - -#[derive(Debug)] -pub struct DummyDoc; - -#[derive(Debug)] -pub struct DummyFields; - -#[derive(Debug)] -pub struct DummyField; - -const DUMMY_VALUE: &LiquidValue = &LiquidValue::Nil; - -impl ObjectView for DummyField { - fn as_value(&self) -> &dyn ValueView { - self - } - - fn size(&self) -> i64 { - 2 - } - - fn keys<'k>(&'k self) -> Box> + 'k> { - Box::new(["name", "value"].iter().map(|s| KStringCow::from_static(s))) - } - - fn values<'k>(&'k self) -> Box + 'k> { - Box::new(vec![DUMMY_VALUE.as_view(), DUMMY_VALUE.as_view()].into_iter()) - } - - fn iter<'k>(&'k self) -> Box, &'k dyn ValueView)> + 'k> { - Box::new(self.keys().zip(self.values())) - } - - fn contains_key(&self, index: &str) -> bool { - index == "name" || index == "value" - } - - fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> { - if self.contains_key(index) { - Some(DUMMY_VALUE.as_view()) - } else { - None - } - } -} - -impl ValueView for DummyField { - fn as_debug(&self) -> &dyn std::fmt::Debug { - self - } - - fn render(&self) -> DisplayCow<'_> { - DUMMY_VALUE.render() - } - - fn source(&self) -> DisplayCow<'_> { - DUMMY_VALUE.source() - } - - fn type_name(&self) -> &'static str { - "object" - } - - fn query_state(&self, state: State) -> bool { - match state { - State::Truthy => true, - State::DefaultValue => false, - State::Empty => false, - State::Blank => false, - } - } - - fn to_kstr(&self) -> KStringCow<'_> { - DUMMY_VALUE.to_kstr() - } - - fn to_value(&self) -> LiquidValue { - let mut this = Object::new(); - this.insert("name".into(), LiquidValue::Nil); - this.insert("value".into(), LiquidValue::Nil); - LiquidValue::Object(this) - } - - fn as_object(&self) -> Option<&dyn ObjectView> { - Some(self) - } -} - -impl ValueView for DummyFields { - fn as_debug(&self) -> &dyn std::fmt::Debug { - self - } - - fn render(&self) -> DisplayCow<'_> { - DUMMY_VALUE.render() - } - - fn source(&self) -> DisplayCow<'_> { - DUMMY_VALUE.source() - } - - fn type_name(&self) -> &'static str { - "array" - } - - fn query_state(&self, state: State) -> bool { - match state { - State::Truthy => true, - State::DefaultValue => false, - State::Empty => false, - State::Blank => false, - } - } - - fn to_kstr(&self) -> KStringCow<'_> { - DUMMY_VALUE.to_kstr() - } - - fn to_value(&self) -> LiquidValue { - LiquidValue::Array(vec![DummyField.to_value()]) - } - - fn as_array(&self) -> Option<&dyn ArrayView> { - Some(self) - } -} - -impl ArrayView for DummyFields { - fn as_value(&self) -> &dyn ValueView { - self - } - - fn size(&self) -> i64 { - u16::MAX as i64 - } - - fn values<'k>(&'k self) -> Box + 'k> { - Box::new(std::iter::once(DummyField.as_value())) - } - - fn contains_key(&self, index: i64) -> bool { - index < self.size() - } - - fn get(&self, _index: i64) -> Option<&dyn ValueView> { - Some(DummyField.as_value()) - } -} - -impl ObjectView for DummyDoc { - fn as_value(&self) -> &dyn ValueView { - self - } - - fn size(&self) -> i64 { - 1000 - } - - fn keys<'k>(&'k self) -> Box> + 'k> { - Box::new(std::iter::empty()) - } - - fn values<'k>(&'k self) -> Box + 'k> { - Box::new(std::iter::empty()) - } - - fn iter<'k>(&'k self) -> Box, &'k dyn ValueView)> + 'k> { - Box::new(std::iter::empty()) - } - - fn contains_key(&self, _index: &str) -> bool { - true - } - - fn get<'s>(&'s self, _index: &str) -> Option<&'s dyn ValueView> { - // Recursively sends itself - Some(self) - } -} - -impl ValueView for DummyDoc { - fn as_debug(&self) -> &dyn std::fmt::Debug { - self - } - - fn render(&self) -> DisplayCow<'_> { - DUMMY_VALUE.render() - } - - fn source(&self) -> DisplayCow<'_> { - DUMMY_VALUE.source() - } - - fn type_name(&self) -> &'static str { - "object" - } - - fn query_state(&self, state: State) -> bool { - match state { - State::Truthy => true, - State::DefaultValue => false, - State::Empty => false, - State::Blank => false, - } - } - - fn to_kstr(&self) -> KStringCow<'_> { - DUMMY_VALUE.to_kstr() - } - - fn to_value(&self) -> LiquidValue { - LiquidValue::Nil - } - - fn as_object(&self) -> Option<&dyn ObjectView> { - Some(self) - } -} - -impl ObjectView for TemplateChecker { - fn as_value(&self) -> &dyn ValueView { - self - } - - fn size(&self) -> i64 { - 2 - } - - fn keys<'k>(&'k self) -> Box> + 'k> { - Box::new(["doc", "fields"].iter().map(|s| KStringCow::from_static(s))) - } - - fn values<'k>(&'k self) -> Box + 'k> { - Box::new( - std::iter::once(DummyDoc.as_value()).chain(std::iter::once(DummyFields.as_value())), - ) - } - - fn iter<'k>(&'k self) -> Box, &'k dyn ValueView)> + 'k> { - Box::new(self.keys().zip(self.values())) - } - - fn contains_key(&self, index: &str) -> bool { - index == "doc" || index == "fields" - } - - fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> { - match index { - "doc" => Some(DummyDoc.as_value()), - "fields" => Some(DummyFields.as_value()), - _ => None, - } - } -} - -impl ValueView for TemplateChecker { - fn as_debug(&self) -> &dyn std::fmt::Debug { - self - } - - fn render(&self) -> liquid::model::DisplayCow<'_> { - DisplayCow::Owned(Box::new(ObjectRender::new(self))) - } - - fn source(&self) -> liquid::model::DisplayCow<'_> { - DisplayCow::Owned(Box::new(ObjectSource::new(self))) - } - - fn type_name(&self) -> &'static str { - "object" - } - - fn query_state(&self, state: liquid::model::State) -> bool { - match state { - State::Truthy => true, - State::DefaultValue | State::Empty | State::Blank => false, - } - } - - fn to_kstr(&self) -> liquid::model::KStringCow<'_> { - let s = ObjectRender::new(self).to_string(); - KStringCow::from_string(s) - } - - fn to_value(&self) -> LiquidValue { - LiquidValue::Object( - self.iter().map(|(k, x)| (k.to_string().into(), x.to_value())).collect(), - ) - } - - fn as_object(&self) -> Option<&dyn ObjectView> { - Some(self) - } -} diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs index b63f6288f..c906e1eb7 100644 --- a/crates/milli/src/search/hybrid.rs +++ b/crates/milli/src/search/hybrid.rs @@ -7,6 +7,7 @@ use roaring::RoaringBitmap; use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy}; use crate::search::new::{distinct_fid, distinct_single_docid}; use crate::search::SemanticSearch; +use crate::vector::SearchQuery; use crate::{Index, MatchingWords, Result, Search, SearchResult}; struct ScoreWithRatioResult { @@ -225,12 +226,9 @@ impl Search<'_> { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); } - // no vector search against placeholder search - let Some(query) = search.query.take() else { - return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); - }; // no embedder, no semantic search - let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else { + let Some(SemanticSearch { vector, embedder_name, embedder, quantized, media }) = semantic + else { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; @@ -241,9 +239,17 @@ impl Search<'_> { let span = tracing::trace_span!(target: "search::hybrid", "embed_one"); let _entered = span.enter(); + let q = search.query.as_deref(); + let media = media.as_ref(); + + let query = match (q, media) { + (Some(text), None) => SearchQuery::Text(text), + (q, media) => SearchQuery::Media { q, media }, + }; + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3); - match embedder.embed_search(&query, Some(deadline)) { + match embedder.embed_search(query, Some(deadline)) { Ok(embedding) => embedding, Err(error) => { tracing::error!(error=%error, "Embedding failed"); @@ -257,8 +263,13 @@ impl Search<'_> { } }; - search.semantic = - Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized }); + search.semantic = Some(SemanticSearch { + vector: Some(vector_query), + embedder_name, + embedder, + quantized, + media, + }); // TODO: would be better to have two distinct functions at this point let vector_results = search.execute()?; diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 37b1aaf09..97d542524 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -10,8 +10,9 @@ pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FAC pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; +use crate::index::MatchingStrategy; use crate::score_details::{ScoreDetails, ScoringStrategy}; -use crate::vector::Embedder; +use crate::vector::{Embedder, Embedding}; use crate::{ execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index, Result, SearchContext, TimeBudget, UserError, @@ -31,6 +32,7 @@ pub mod similar; #[derive(Debug, Clone)] pub struct SemanticSearch { vector: Option>, + media: Option, embedder_name: String, embedder: Arc, quantized: bool, @@ -92,9 +94,10 @@ impl<'a> Search<'a> { embedder_name: String, embedder: Arc, quantized: bool, - vector: Option>, + vector: Option, + media: Option, ) -> &mut Search<'a> { - self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector }); + self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector, media }); self } @@ -230,24 +233,28 @@ impl<'a> Search<'a> { degraded, used_negative_operator, } = match self.semantic.as_ref() { - Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => { - execute_vector_search( - &mut ctx, - vector, - self.scoring_strategy, - universe, - &self.sort_criteria, - &self.distinct, - self.geo_param, - self.offset, - self.limit, - embedder_name, - embedder, - *quantized, - self.time_budget.clone(), - self.ranking_score_threshold, - )? - } + Some(SemanticSearch { + vector: Some(vector), + embedder_name, + embedder, + quantized, + media: _, + }) => execute_vector_search( + &mut ctx, + vector, + self.scoring_strategy, + universe, + &self.sort_criteria, + &self.distinct, + self.geo_param, + self.offset, + self.limit, + embedder_name, + embedder, + *quantized, + self.time_budget.clone(), + self.ranking_score_threshold, + )?, _ => execute_search( &mut ctx, self.query.as_deref(), @@ -364,6 +371,16 @@ impl Default for TermsMatchingStrategy { } } +impl From for TermsMatchingStrategy { + fn from(other: MatchingStrategy) -> Self { + match other { + MatchingStrategy::Last => Self::Last, + MatchingStrategy::All => Self::All, + MatchingStrategy::Frequency => Self::Frequency, + } + } +} + fn get_first(s: &str) -> &str { match s.chars().next() { Some(c) => &s[..c.len_utf8()], diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 663599553..3e7fe3458 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -1,8 +1,9 @@ +use std::collections::VecDeque; + use heed::types::{Bytes, Unit}; use heed::{RoPrefix, RoTxn}; use roaring::RoaringBitmap; use rstar::RTree; -use std::collections::VecDeque; use super::facet_string_values; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; diff --git a/crates/milli/src/search/new/query_term/compute_derivations.rs b/crates/milli/src/search/new/query_term/compute_derivations.rs index 5edf85e97..dcb68f2ea 100644 --- a/crates/milli/src/search/new/query_term/compute_derivations.rs +++ b/crates/milli/src/search/new/query_term/compute_derivations.rs @@ -1,7 +1,6 @@ use std::borrow::Cow; use std::cmp::Ordering; use std::collections::BTreeSet; -use std::ops::ControlFlow; use fst::automaton::Str; use fst::{IntoStreamer, Streamer}; @@ -16,12 +15,6 @@ use crate::search::new::{limits, SearchContext}; use crate::search::{build_dfa, get_first}; use crate::{Result, MAX_WORD_LENGTH}; -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum NumberOfTypos { - One, - Two, -} - impl Interned { pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> { let s = ctx.term_interner.get_mut(self); @@ -45,7 +38,7 @@ impl Interned { fn find_zero_typo_prefix_derivations( ctx: &mut SearchContext<'_>, word_interned: Interned, - mut visit: impl FnMut(Interned) -> Result>, + prefix_of: &mut BTreeSet>, ) -> Result<()> { let word = ctx.word_interner.get(word_interned).to_owned(); let word = word.as_str(); @@ -65,8 +58,8 @@ fn find_zero_typo_prefix_derivations( let derived_word = derived_word.to_string(); let derived_word_interned = ctx.word_interner.insert(derived_word); if derived_word_interned != word_interned { - let cf = visit(derived_word_interned)?; - if cf.is_break() { + prefix_of.insert(derived_word_interned); + if prefix_of.len() >= limits::MAX_PREFIX_COUNT { break; } } @@ -81,7 +74,7 @@ fn find_one_typo_derivations( ctx: &mut SearchContext<'_>, word_interned: Interned, is_prefix: bool, - mut visit: impl FnMut(Interned) -> Result>, + one_typo_words: &mut BTreeSet>, ) -> Result<()> { let fst = ctx.get_words_fst()?; let word = ctx.word_interner.get(word_interned).to_owned(); @@ -98,8 +91,8 @@ fn find_one_typo_derivations( 1 => { let derived_word = std::str::from_utf8(derived_word)?; let derived_word = ctx.word_interner.insert(derived_word.to_owned()); - let cf = visit(derived_word)?; - if cf.is_break() { + one_typo_words.insert(derived_word); + if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT { break; } } @@ -116,7 +109,8 @@ fn find_one_two_typo_derivations( is_prefix: bool, fst: fst::Set>, word_interner: &mut DedupInterner, - mut visit: impl FnMut(Interned, NumberOfTypos) -> Result>, + one_typo_words: &mut BTreeSet>, + two_typo_words: &mut BTreeSet>, ) -> Result<()> { let word = word_interner.get(word_interned).to_owned(); let word = word.as_str(); @@ -130,15 +124,20 @@ fn find_one_two_typo_derivations( let mut stream = fst.search_with_state(automaton).into_stream(); while let Some((derived_word, state)) = stream.next() { + let finished_one_typo_words = one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT; + let finished_two_typo_words = two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT; + if finished_one_typo_words && finished_two_typo_words { + // No chance we will add either one- or two-typo derivations anymore, stop iterating. + break; + } let derived_word = std::str::from_utf8(derived_word)?; - let derived_word_interned = word_interner.insert(derived_word.to_owned()); + // No need to intern here // in the case the typo is on the first letter, we know the number of typo // is two - if get_first(derived_word) != get_first(word) { - let cf = visit(derived_word_interned, NumberOfTypos::Two)?; - if cf.is_break() { - break; - } + if get_first(derived_word) != get_first(word) && !finished_two_typo_words { + let derived_word_interned = word_interner.insert(derived_word.to_owned()); + two_typo_words.insert(derived_word_interned); + continue; } else { // Else, we know that it is the second dfa that matched and compute the // correct distance @@ -146,16 +145,18 @@ fn find_one_two_typo_derivations( match d.to_u8() { 0 => (), 1 => { - let cf = visit(derived_word_interned, NumberOfTypos::One)?; - if cf.is_break() { - break; + if finished_one_typo_words { + continue; } + let derived_word_interned = word_interner.insert(derived_word.to_owned()); + one_typo_words.insert(derived_word_interned); } 2 => { - let cf = visit(derived_word_interned, NumberOfTypos::Two)?; - if cf.is_break() { - break; + if finished_two_typo_words { + continue; } + let derived_word_interned = word_interner.insert(derived_word.to_owned()); + two_typo_words.insert(derived_word_interned); } _ => unreachable!("2 typos DFA produced a distance greater than 2"), } @@ -211,14 +212,7 @@ pub fn partially_initialized_term_from_word( } if is_prefix && use_prefix_db.is_none() { - find_zero_typo_prefix_derivations(ctx, word_interned, |derived_word| { - if prefix_of.len() < limits::MAX_PREFIX_COUNT { - prefix_of.insert(derived_word); - Ok(ControlFlow::Continue(())) - } else { - Ok(ControlFlow::Break(())) - } - })?; + find_zero_typo_prefix_derivations(ctx, word_interned, &mut prefix_of)?; } let synonyms = ctx.index.synonyms(ctx.txn)?; let mut synonym_word_count = 0; @@ -281,14 +275,7 @@ impl Interned { let mut one_typo_words = BTreeSet::new(); if *max_nbr_typos > 0 { - find_one_typo_derivations(ctx, original, is_prefix, |derived_word| { - if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT { - one_typo_words.insert(derived_word); - Ok(ControlFlow::Continue(())) - } else { - Ok(ControlFlow::Break(())) - } - })?; + find_one_typo_derivations(ctx, original, is_prefix, &mut one_typo_words)?; } let split_words = if allows_split_words { @@ -343,27 +330,8 @@ impl Interned { *is_prefix, ctx.index.words_fst(ctx.txn)?, &mut ctx.word_interner, - |derived_word, nbr_typos| { - if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT - && two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT - { - // No chance we will add either one- or two-typo derivations anymore, stop iterating. - return Ok(ControlFlow::Break(())); - } - match nbr_typos { - NumberOfTypos::One => { - if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT { - one_typo_words.insert(derived_word); - } - } - NumberOfTypos::Two => { - if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT { - two_typo_words.insert(derived_word); - } - } - } - Ok(ControlFlow::Continue(())) - }, + &mut one_typo_words, + &mut two_typo_words, )?; } diff --git a/crates/milli/src/search/new/query_term/parse_query.rs b/crates/milli/src/search/new/query_term/parse_query.rs index e492363f8..64bbb94c0 100644 --- a/crates/milli/src/search/new/query_term/parse_query.rs +++ b/crates/milli/src/search/new/query_term/parse_query.rs @@ -202,11 +202,11 @@ pub fn number_of_typos_allowed<'ctx>( Ok(Box::new(move |word: &str| { if !authorize_typos - || word.len() < min_len_one_typo as usize + || word.chars().count() < min_len_one_typo as usize || exact_words.as_ref().is_some_and(|fst| fst.contains(word)) { 0 - } else if word.len() < min_len_two_typos as usize { + } else if word.chars().count() < min_len_two_typos as usize { 1 } else { 2 @@ -380,4 +380,62 @@ mod tests { Ok(()) } + + #[test] + fn test_unicode_typo_tolerance_fixed() -> Result<()> { + let temp_index = temp_index_with_documents(); + let rtxn = temp_index.read_txn()?; + let ctx = SearchContext::new(&temp_index, &rtxn)?; + + let nbr_typos = number_of_typos_allowed(&ctx)?; + + // ASCII word "doggy" (5 chars, 5 bytes) + let ascii_word = "doggy"; + let ascii_typos = nbr_typos(ascii_word); + + // Cyrillic word "собак" (5 chars, 10 bytes) + let cyrillic_word = "собак"; + let cyrillic_typos = nbr_typos(cyrillic_word); + + // Both words have 5 characters, so they should have the same typo tolerance + assert_eq!( + ascii_typos, cyrillic_typos, + "Words with same character count should get same typo tolerance" + ); + + // With default settings (oneTypo=5, twoTypos=9), 5-char words should get 1 typo + assert_eq!(ascii_typos, 1, "5-character word should get 1 typo tolerance"); + assert_eq!(cyrillic_typos, 1, "5-character word should get 1 typo tolerance"); + + Ok(()) + } + + #[test] + fn test_various_unicode_scripts() -> Result<()> { + let temp_index = temp_index_with_documents(); + let rtxn = temp_index.read_txn()?; + let ctx = SearchContext::new(&temp_index, &rtxn)?; + + let nbr_typos = number_of_typos_allowed(&ctx)?; + + // Let's use 5-character words for consistent testing + let five_char_words = vec![ + ("doggy", "ASCII"), // 5 chars, 5 bytes + ("café!", "Accented"), // 5 chars, 7 bytes + ("собак", "Cyrillic"), // 5 chars, 10 bytes + ]; + + let expected_typos = 1; // With default settings, 5-char words get 1 typo + + for (word, script) in five_char_words { + let typos = nbr_typos(word); + assert_eq!( + typos, expected_typos, + "{} word '{}' should get {} typo(s)", + script, word, expected_typos + ); + } + + Ok(()) + } } diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index 4a6cc9b90..38f39e18b 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -8,7 +8,7 @@ use maplit::{btreemap, hashset}; use crate::progress::Progress; use crate::update::new::indexer; use crate::update::{IndexerConfig, Settings}; -use crate::vector::EmbeddingConfigs; +use crate::vector::RuntimeEmbedders; use crate::{db_snap, Criterion, FilterableAttributesRule, Index}; pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson"); use crate::constants::RESERVED_GEO_FIELD_NAME; @@ -44,7 +44,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("america") => vec![S("the united states")], }); builder.set_searchable_fields(vec![S("title"), S("description")]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); // index documents @@ -55,7 +55,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); @@ -95,6 +95,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); diff --git a/crates/milli/src/search/new/vector_sort.rs b/crates/milli/src/search/new/vector_sort.rs index 834f97384..2c201e899 100644 --- a/crates/milli/src/search/new/vector_sort.rs +++ b/crates/milli/src/search/new/vector_sort.rs @@ -32,8 +32,8 @@ impl VectorSort { ) -> Result { let embedder_index = ctx .index - .embedder_category_id - .get(ctx.txn, embedder_name)? + .embedding_configs() + .embedder_id(ctx.txn, embedder_name)? .ok_or_else(|| crate::UserError::InvalidSearchEmbedder(embedder_name.to_owned()))?; Ok(Self { diff --git a/crates/milli/src/search/similar.rs b/crates/milli/src/search/similar.rs index 759940f9c..903b5fcf9 100644 --- a/crates/milli/src/search/similar.rs +++ b/crates/milli/src/search/similar.rs @@ -64,10 +64,13 @@ impl<'a> Similar<'a> { let universe = universe; - let embedder_index = - self.index.embedder_category_id.get(self.rtxn, &self.embedder_name)?.ok_or_else( - || crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned()), - )?; + let embedder_index = self + .index + .embedding_configs() + .embedder_id(self.rtxn, &self.embedder_name)? + .ok_or_else(|| { + crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned()) + })?; let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized); let results = reader.nns_by_item( diff --git a/crates/milli/src/test_index.rs b/crates/milli/src/test_index.rs index dfd570b96..6bb6b1345 100644 --- a/crates/milli/src/test_index.rs +++ b/crates/milli/src/test_index.rs @@ -18,7 +18,7 @@ use crate::update::{ self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, Settings, }; use crate::vector::settings::{EmbedderSource, EmbeddingSettings}; -use crate::vector::EmbeddingConfigs; +use crate::vector::RuntimeEmbedders; use crate::{db_snap, obkv_to_json, Filter, FilterableAttributesRule, Index, Search, SearchResult}; pub(crate) struct TempIndex { @@ -66,7 +66,7 @@ impl TempIndex { let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?; let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; + let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.runtime_embedders; let mut indexer = indexer::DocumentOperation::new(); match self.index_documents_config.update_method { IndexDocumentsMethod::ReplaceDocuments => { @@ -103,6 +103,7 @@ impl TempIndex { embedders, &|| false, &Progress::default(), + &Default::default(), ) }) .unwrap()?; @@ -134,7 +135,7 @@ impl TempIndex { ) -> Result<(), crate::error::Error> { let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config); update(&mut builder); - builder.execute(drop, || false)?; + builder.execute(&|| false, &Progress::default(), Default::default())?; Ok(()) } @@ -150,7 +151,7 @@ impl TempIndex { let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?; let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; + let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.runtime_embedders; let mut indexer = indexer::DocumentOperation::new(); let external_document_ids: Vec<_> = @@ -185,6 +186,7 @@ impl TempIndex { embedders, &|| false, &Progress::default(), + &Default::default(), ) }) .unwrap()?; @@ -221,7 +223,7 @@ fn aborting_indexation() { let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let payload = documents!([ { "id": 1, "name": "kevin" }, @@ -259,6 +261,7 @@ fn aborting_indexation() { embedders, &|| should_abort.load(Relaxed), &Progress::default(), + &Default::default(), ) }) .unwrap() diff --git a/crates/milli/src/thread_pool_no_abort.rs b/crates/milli/src/thread_pool_no_abort.rs index 0c2fbb30d..66380ff36 100644 --- a/crates/milli/src/thread_pool_no_abort.rs +++ b/crates/milli/src/thread_pool_no_abort.rs @@ -1,7 +1,7 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; -use rayon::{ThreadPool, ThreadPoolBuilder}; +use rayon::{BroadcastContext, ThreadPool, ThreadPoolBuilder}; use thiserror::Error; /// A rayon ThreadPool wrapper that can catch panics in the pool @@ -32,6 +32,22 @@ impl ThreadPoolNoAbort { } } + pub fn broadcast(&self, op: OP) -> Result, PanicCatched> + where + OP: Fn(BroadcastContext<'_>) -> R + Sync, + R: Send, + { + self.active_operations.fetch_add(1, Ordering::Relaxed); + let output = self.thread_pool.broadcast(op); + self.active_operations.fetch_sub(1, Ordering::Relaxed); + // While reseting the pool panic catcher we return an error if we catched one. + if self.pool_catched_panic.swap(false, Ordering::SeqCst) { + Err(PanicCatched) + } else { + Ok(output) + } + } + pub fn current_num_threads(&self) -> usize { self.thread_pool.current_num_threads() } diff --git a/crates/milli/src/update/chat.rs b/crates/milli/src/update/chat.rs new file mode 100644 index 000000000..2f364894d --- /dev/null +++ b/crates/milli/src/update/chat.rs @@ -0,0 +1,182 @@ +use std::error::Error; +use std::fmt; + +use deserr::errors::JsonError; +use deserr::Deserr; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +use crate::index::{self, ChatConfig, MatchingStrategy, RankingScoreThreshold, SearchParameters}; +use crate::prompt::{default_max_bytes, PromptData}; +use crate::update::Setting; + +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(error = JsonError, deny_unknown_fields, rename_all = camelCase)] +pub struct ChatSettings { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub description: Setting, + + /// A liquid template used to render documents to a text that can be embedded. + /// + /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder. + /// The embedder then generates document vectors based on this text. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub document_template: Setting, + + /// Rendered texts are truncated to this size. Defaults to 400. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub document_template_max_bytes: Setting, + + /// The search parameters to use for the LLM. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub search_parameters: Setting, +} + +impl From for ChatSettings { + fn from(config: ChatConfig) -> Self { + let ChatConfig { + description, + prompt: PromptData { template, max_bytes }, + search_parameters, + } = config; + ChatSettings { + description: Setting::Set(description), + document_template: Setting::Set(template), + document_template_max_bytes: Setting::Set( + max_bytes.unwrap_or(default_max_bytes()).get(), + ), + search_parameters: Setting::Set({ + let SearchParameters { + hybrid, + limit, + sort, + distinct, + matching_strategy, + attributes_to_search_on, + ranking_score_threshold, + } = search_parameters; + + let hybrid = hybrid.map(|index::HybridQuery { semantic_ratio, embedder }| { + HybridQuery { semantic_ratio: SemanticRatio(semantic_ratio), embedder } + }); + + ChatSearchParams { + hybrid: Setting::some_or_not_set(hybrid), + limit: Setting::some_or_not_set(limit), + sort: Setting::some_or_not_set(sort), + distinct: Setting::some_or_not_set(distinct), + matching_strategy: Setting::some_or_not_set(matching_strategy), + attributes_to_search_on: Setting::some_or_not_set(attributes_to_search_on), + ranking_score_threshold: Setting::some_or_not_set(ranking_score_threshold), + } + }), + } + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(error = JsonError, deny_unknown_fields, rename_all = camelCase)] +pub struct ChatSearchParams { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub hybrid: Setting, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default = Setting::Set(20))] + #[schema(value_type = Option)] + pub limit: Setting, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + pub sort: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub distinct: Setting, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub matching_strategy: Setting, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + pub attributes_to_search_on: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub ranking_score_threshold: Setting, +} + +#[derive(Debug, Clone, Default, Deserr, ToSchema, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +#[deserr(error = JsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct HybridQuery { + #[deserr(default)] + #[serde(default)] + #[schema(default, value_type = f32)] + pub semantic_ratio: SemanticRatio, + #[schema(value_type = String)] + pub embedder: String, +} + +#[derive(Debug, Clone, Copy, Deserr, ToSchema, PartialEq, Serialize, Deserialize)] +#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)] +pub struct SemanticRatio(f32); + +impl Default for SemanticRatio { + fn default() -> Self { + SemanticRatio(0.5) + } +} + +impl std::convert::TryFrom for SemanticRatio { + type Error = InvalidSearchSemanticRatio; + + fn try_from(f: f32) -> Result { + // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable + #[allow(clippy::manual_range_contains)] + if f > 1.0 || f < 0.0 { + Err(InvalidSearchSemanticRatio) + } else { + Ok(SemanticRatio(f)) + } + } +} + +#[derive(Debug)] +pub struct InvalidSearchSemanticRatio; + +impl Error for InvalidSearchSemanticRatio {} + +impl fmt::Display for InvalidSearchSemanticRatio { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`." + ) + } +} + +impl std::ops::Deref for SemanticRatio { + type Target = f32; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/crates/milli/src/update/clear_documents.rs b/crates/milli/src/update/clear_documents.rs index b0ae070de..01631e9a3 100644 --- a/crates/milli/src/update/clear_documents.rs +++ b/crates/milli/src/update/clear_documents.rs @@ -64,11 +64,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { self.index.delete_geo_faceted_documents_ids(self.wtxn)?; // Remove all user-provided bits from the configs - let mut configs = self.index.embedding_configs(self.wtxn)?; - for config in configs.iter_mut() { - config.user_provided.clear(); - } - self.index.put_embedding_configs(self.wtxn, configs)?; + self.index.embedding_configs().clear_embedder_info_docids(self.wtxn)?; // Clear the other databases. external_documents_ids.clear(self.wtxn)?; diff --git a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index d502e69cc..b906c7778 100644 --- a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -29,7 +29,6 @@ pub fn extract_docid_word_positions( let max_positions_per_attributes = max_positions_per_attributes .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE)); let max_memory = indexer.max_memory_by_thread(); - let force_reindexing = settings_diff.reindex_searchable(); // initialize destination values. let mut documents_ids = RoaringBitmap::new(); @@ -43,6 +42,12 @@ pub fn extract_docid_word_positions( true, ); + let force_reindexing = settings_diff.reindex_searchable(); + let skip_indexing = !force_reindexing && settings_diff.settings_update_only(); + if skip_indexing { + return sorter_into_reader(docid_word_positions_sorter, indexer); + } + // initialize buffers. let mut del_buffers = Buffers::default(); let mut add_buffers = Buffers::default(); diff --git a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs index cb8c121ce..064cfd154 100644 --- a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -1,4 +1,5 @@ use std::cmp::Ordering; +use std::collections::{BTreeMap, VecDeque}; use std::convert::{TryFrom, TryInto}; use std::fs::File; use std::io::{self, BufReader, BufWriter}; @@ -6,24 +7,29 @@ use std::mem::size_of; use std::str::from_utf8; use std::sync::Arc; +use bumpalo::Bump; use bytemuck::cast_slice; +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use grenad::Writer; +use obkv::KvReaderU16; use ordered_float::OrderedFloat; -use roaring::RoaringBitmap; use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::error::FaultSource; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; -use crate::index::IndexEmbeddingConfig; +use crate::progress::EmbedderStats; use crate::prompt::Prompt; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; +use crate::vector::db::{EmbedderInfo, EmbeddingStatus, EmbeddingStatusDelta}; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; +use crate::vector::extractor::{Extractor, ExtractorDiff, RequestFragmentExtractor}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState}; +use crate::vector::session::{EmbedSession, Metadata, OnEmbed}; use crate::vector::settings::ReindexAction; -use crate::vector::{Embedder, Embedding}; +use crate::vector::{Embedder, Embedding, RuntimeEmbedder, RuntimeFragment}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; /// The length of the elements that are always in the buffer when inserting new values. @@ -36,12 +42,13 @@ pub struct ExtractedVectorPoints { pub remove_vectors: grenad::Reader>, // docid -> prompt pub prompts: grenad::Reader>, + // docid, extractor_id -> Option + pub inputs: grenad::Reader>, // embedder pub embedder_name: String, - pub embedder: Arc, - pub add_to_user_provided: RoaringBitmap, - pub remove_from_user_provided: RoaringBitmap, + pub runtime: Arc, + pub embedding_status_delta: EmbeddingStatusDelta, } enum VectorStateDelta { @@ -55,46 +62,74 @@ enum VectorStateDelta { // Remove any previous vector // Note: changing the value of the prompt **does require** recording this delta NowGenerated(String), + + // Add and remove the vectors computed from the fragments. + UpdateGeneratedFromFragments(Vec<(String, ExtractorDiff)>), + + /// Wasn't generated from fragments, but now is. + /// Delete any previous vectors and add the new vectors + NowGeneratedFromFragments(Vec<(String, Value)>), } impl VectorStateDelta { - fn into_values(self) -> (bool, String, Vec>) { + fn into_values(self) -> (bool, String, BTreeMap>, Vec>) { match self { VectorStateDelta::NoChange => Default::default(), - VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()), - // We always delete the previous vectors - VectorStateDelta::NowManual(add) => (true, Default::default(), add), - VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()), + VectorStateDelta::NowRemoved => { + (true, Default::default(), Default::default(), Default::default()) + } + VectorStateDelta::NowManual(add) => (true, Default::default(), Default::default(), add), + VectorStateDelta::NowGenerated(prompt) => { + (true, prompt, Default::default(), Default::default()) + } + VectorStateDelta::UpdateGeneratedFromFragments(fragments) => ( + false, + Default::default(), + ExtractorDiff::into_list_of_changes(fragments), + Default::default(), + ), + VectorStateDelta::NowGeneratedFromFragments(items) => ( + true, + Default::default(), + ExtractorDiff::into_list_of_changes( + items.into_iter().map(|(name, value)| (name, ExtractorDiff::Added(value))), + ), + Default::default(), + ), } } } -struct EmbedderVectorExtractor { +struct EmbedderVectorExtractor<'a> { embedder_name: String, - embedder: Arc, - prompt: Arc, + embedder_info: &'a EmbedderInfo, + runtime: Arc, // (docid) -> (prompt) prompts_writer: Writer>, + // (docid, extractor_id) -> (Option) + inputs_writer: Writer>, // (docid) -> () remove_vectors_writer: Writer>, // (docid, _index) -> KvWriterDelAdd -> Vector manual_vectors_writer: Writer>, - // The docids of the documents that contains a user defined embedding - add_to_user_provided: RoaringBitmap, + embedding_status_delta: EmbeddingStatusDelta, action: ExtractionAction, } -struct DocumentOperation { - // The docids of the documents that contains an auto-generated embedding - remove_from_user_provided: RoaringBitmap, -} - enum ExtractionAction { SettingsFullReindex, - SettingsRegeneratePrompts { old_prompt: Arc }, - DocumentOperation(DocumentOperation), + SettingsRegeneratePrompts { + old_runtime: Arc, + }, + /// List of fragments to update/add + SettingsRegenerateFragments { + // name and indices, respectively in old and new runtime, of the fragments to examine. + must_regenerate_fragments: BTreeMap, usize)>, + old_runtime: Arc, + }, + DocumentOperation, } struct ManualEmbedderErrors { @@ -182,8 +217,8 @@ impl ManualEmbedderErrors { pub fn extract_vector_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, - embedders_configs: &[IndexEmbeddingConfig], settings_diff: &InnerIndexSettingsDiff, + embedder_info: &[(String, EmbedderInfo)], possible_embedding_mistakes: &PossibleEmbeddingMistakes, ) -> Result<(Vec, UnusedVectorsDistribution)> { let mut unused_vectors_distribution = UnusedVectorsDistribution::new(); @@ -201,15 +236,15 @@ pub fn extract_vector_points( let mut extractors = Vec::new(); - let mut configs = settings_diff.new.embedding_configs.clone().into_inner(); - let old_configs = &settings_diff.old.embedding_configs; - + let mut configs = settings_diff.new.runtime_embedders.clone().into_inner(); + let old_configs = &settings_diff.old.runtime_embedders; if reindex_vectors { for (name, action) in settings_diff.embedding_config_updates.iter() { if let Some(action) = action.reindex() { - let Some((embedder_name, (embedder, prompt, _quantized))) = - configs.remove_entry(name) - else { + let (_, embedder_info) = + embedder_info.iter().find(|(embedder_name, _)| embedder_name == name).unwrap(); + + let Some((embedder_name, runtime)) = configs.remove_entry(name) else { tracing::error!(embedder = name, "Requested embedder config not found"); continue; }; @@ -228,6 +263,12 @@ pub fn extract_vector_points( tempfile::tempfile()?, ); + let inputs_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); + // (docid) -> () let remove_vectors_writer = create_writer( indexer.chunk_compression_type, @@ -237,24 +278,68 @@ pub fn extract_vector_points( let action = match action { ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, - ReindexAction::RegeneratePrompts => { - let Some((_, old_prompt, _quantized)) = old_configs.get(name) else { + ReindexAction::RegenerateFragments(regenerate_fragments) => { + let Some(old_runtime) = old_configs.get(name) else { tracing::error!(embedder = name, "Old embedder config not found"); continue; }; - ExtractionAction::SettingsRegeneratePrompts { old_prompt } + let fragment_diffs = regenerate_fragments + .iter() + .filter_map(|(name, fragment)| match fragment { + crate::vector::settings::RegenerateFragment::Update => { + let old_value = old_runtime + .fragments() + .binary_search_by_key(&name, |fragment| &fragment.name) + .ok(); + let Ok(new_value) = runtime + .fragments() + .binary_search_by_key(&name, |fragment| &fragment.name) + else { + return None; + }; + Some((name.clone(), (old_value, new_value))) + } + // was already handled in transform + crate::vector::settings::RegenerateFragment::Remove => None, + crate::vector::settings::RegenerateFragment::Add => { + let Ok(new_value) = runtime + .fragments() + .binary_search_by_key(&name, |fragment| &fragment.name) + else { + return None; + }; + Some((name.clone(), (None, new_value))) + } + }) + .collect(); + ExtractionAction::SettingsRegenerateFragments { + old_runtime: old_runtime.clone(), + must_regenerate_fragments: fragment_diffs, + } + } + + ReindexAction::RegeneratePrompts => { + let Some(old_runtime) = old_configs.get(name) else { + tracing::error!(embedder = name, "Old embedder config not found"); + continue; + }; + + ExtractionAction::SettingsRegeneratePrompts { + old_runtime: old_runtime.clone(), + } } }; extractors.push(EmbedderVectorExtractor { embedder_name, - embedder, - prompt, + runtime, + embedder_info, prompts_writer, + inputs_writer, remove_vectors_writer, manual_vectors_writer, - add_to_user_provided: RoaringBitmap::new(), + embedding_status_delta: Default::default(), action, }); } else { @@ -263,8 +348,12 @@ pub fn extract_vector_points( } } else { // document operation + for (embedder_name, runtime) in configs.into_iter() { + let (_, embedder_info) = embedder_info + .iter() + .find(|(name, _)| embedder_name.as_str() == name.as_str()) + .unwrap(); - for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() { // (docid, _index) -> KvWriterDelAdd -> Vector let manual_vectors_writer = create_writer( indexer.chunk_compression_type, @@ -279,6 +368,12 @@ pub fn extract_vector_points( tempfile::tempfile()?, ); + let inputs_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); + // (docid) -> () let remove_vectors_writer = create_writer( indexer.chunk_compression_type, @@ -288,22 +383,23 @@ pub fn extract_vector_points( extractors.push(EmbedderVectorExtractor { embedder_name, - embedder, - prompt, + runtime, + embedder_info, prompts_writer, + inputs_writer, remove_vectors_writer, manual_vectors_writer, - add_to_user_provided: RoaringBitmap::new(), - action: ExtractionAction::DocumentOperation(DocumentOperation { - remove_from_user_provided: RoaringBitmap::new(), - }), + embedding_status_delta: Default::default(), + action: ExtractionAction::DocumentOperation, }); } } let mut key_buffer = Vec::new(); let mut cursor = obkv_documents.into_cursor()?; + let mut doc_alloc = Bump::new(); while let Some((key, value)) = cursor.move_on_next()? { + doc_alloc.reset(); // this must always be serialized as (docid, external_docid); const SIZE_OF_DOCUMENTID: usize = std::mem::size_of::(); let (docid_bytes, external_id_bytes) = @@ -319,9 +415,12 @@ pub fn extract_vector_points( // lazily get it when needed let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() }; + let regenerate_for_embedders = embedder_info + .iter() + .filter(|&(_, infos)| infos.embedding_status.must_regenerate(docid)) + .map(|(name, _)| name.clone()); let mut parsed_vectors = ParsedVectorsDiff::new( - docid, - embedders_configs, + regenerate_for_embedders, obkv, old_vectors_fid, new_vectors_fid, @@ -330,44 +429,40 @@ pub fn extract_vector_points( for EmbedderVectorExtractor { embedder_name, - embedder, - prompt, + runtime, + embedder_info, prompts_writer, + inputs_writer, remove_vectors_writer, manual_vectors_writer, - add_to_user_provided, + embedding_status_delta, action, } in extractors.iter_mut() { - let embedder_is_manual = matches!(**embedder, Embedder::UserProvided(_)); + let embedder_is_manual = matches!(*runtime.embedder, Embedder::UserProvided(_)); let (old, new) = parsed_vectors.remove(embedder_name); + let new_must_regenerate = new.must_regenerate(); let delta = match action { ExtractionAction::SettingsFullReindex => match old { // A full reindex can be triggered either by: // 1. a new embedder // 2. an existing embedder changed so that it must regenerate all generated embeddings. // For a new embedder, there can be `_vectors.embedder` embeddings to add to the DB - VectorState::Inline(vectors) => { - if !vectors.must_regenerate() { - add_to_user_provided.insert(docid); - } - - match vectors.into_array_of_vectors() { - Some(add_vectors) => { - if add_vectors.len() > usize::from(u8::MAX) { - return Err(crate::Error::UserError( - crate::UserError::TooManyVectors( - document_id().to_string(), - add_vectors.len(), - ), - )); - } - VectorStateDelta::NowManual(add_vectors) + VectorState::Inline(vectors) => match vectors.into_array_of_vectors() { + Some(add_vectors) => { + if add_vectors.len() > usize::from(u8::MAX) { + return Err(crate::Error::UserError( + crate::UserError::TooManyVectors( + document_id().to_string(), + add_vectors.len(), + ), + )); } - None => VectorStateDelta::NoChange, + VectorStateDelta::NowManual(add_vectors) } - } + None => VectorStateDelta::NoChange, + }, // this happens only when an existing embedder changed. We cannot regenerate userProvided vectors VectorState::Manual => VectorStateDelta::NoChange, // generated vectors must be regenerated @@ -380,11 +475,81 @@ pub fn extract_vector_points( ); continue; } - regenerate_prompt(obkv, prompt, new_fields_ids_map)? + let has_fragments = !runtime.fragments().is_empty(); + + if has_fragments { + regenerate_all_fragments( + runtime.fragments(), + &doc_alloc, + new_fields_ids_map, + obkv, + ) + } else { + regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)? + } } }, + ExtractionAction::SettingsRegenerateFragments { + must_regenerate_fragments, + old_runtime, + } => { + if old.must_regenerate() { + let has_fragments = !runtime.fragments().is_empty(); + let old_has_fragments = !old_runtime.fragments().is_empty(); + + let is_adding_fragments = has_fragments && !old_has_fragments; + + if is_adding_fragments { + regenerate_all_fragments( + runtime.fragments(), + &doc_alloc, + new_fields_ids_map, + obkv, + ) + } else if !has_fragments { + // removing fragments + regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)? + } else { + let mut fragment_diff = Vec::new(); + let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map(); + + let obkv_document = crate::update::new::document::KvDelAddDocument::new( + obkv, + DelAdd::Addition, + new_fields_ids_map, + ); + for (name, (old_index, new_index)) in must_regenerate_fragments { + let Some(new) = runtime.fragments().get(*new_index) else { + continue; + }; + + let new = + RequestFragmentExtractor::new(new, &doc_alloc).ignore_errors(); + + let diff = { + let old = old_index.as_ref().and_then(|old| { + let old = old_runtime.fragments().get(*old)?; + Some( + RequestFragmentExtractor::new(old, &doc_alloc) + .ignore_errors(), + ) + }); + let old = old.as_ref(); + Extractor::diff_settings(&new, &obkv_document, &(), old) + } + .expect("ignoring errors so this cannot fail"); + fragment_diff.push((name.clone(), diff)); + } + VectorStateDelta::UpdateGeneratedFromFragments(fragment_diff) + } + } else { + // we can simply ignore user provided vectors as they are not regenerated and are + // already in the DB since this is an existing embedder + VectorStateDelta::NoChange + } + } // prompt regeneration is only triggered for existing embedders - ExtractionAction::SettingsRegeneratePrompts { old_prompt } => { + ExtractionAction::SettingsRegeneratePrompts { old_runtime } => { if old.must_regenerate() { if embedder_is_manual { ManualEmbedderErrors::push_error( @@ -394,24 +559,32 @@ pub fn extract_vector_points( ); continue; } - regenerate_if_prompt_changed( - obkv, - (old_prompt, prompt), - (old_fields_ids_map, new_fields_ids_map), - )? + let has_fragments = !runtime.fragments().is_empty(); + + if has_fragments { + regenerate_all_fragments( + runtime.fragments(), + &doc_alloc, + new_fields_ids_map, + obkv, + ) + } else { + regenerate_if_prompt_changed( + obkv, + (&old_runtime.document_template, &runtime.document_template), + (old_fields_ids_map, new_fields_ids_map), + )? + } } else { // we can simply ignore user provided vectors as they are not regenerated and are // already in the DB since this is an existing embedder VectorStateDelta::NoChange } } - ExtractionAction::DocumentOperation(DocumentOperation { - remove_from_user_provided, - }) => extract_vector_document_diff( - docid, + ExtractionAction::DocumentOperation => extract_vector_document_diff( obkv, - prompt, - (add_to_user_provided, remove_from_user_provided), + runtime, + &doc_alloc, (old, new), (old_fields_ids_map, new_fields_ids_map), document_id, @@ -420,13 +593,25 @@ pub fn extract_vector_points( &mut manual_errors, )?, }; + + // update the embedding status + push_embedding_status_delta( + embedding_status_delta, + docid, + &delta, + new_must_regenerate, + &embedder_info.embedding_status, + ); + // and we finally push the unique vectors into the writer push_vectors_diff( remove_vectors_writer, prompts_writer, + inputs_writer, manual_vectors_writer, &mut key_buffer, delta, + runtime.fragments(), )?; } @@ -443,45 +628,65 @@ pub fn extract_vector_points( for EmbedderVectorExtractor { embedder_name, - embedder, - prompt: _, + runtime, + embedder_info: _, prompts_writer, + inputs_writer, remove_vectors_writer, - action, + action: _, manual_vectors_writer, - add_to_user_provided, + embedding_status_delta, } in extractors { - let remove_from_user_provided = - if let ExtractionAction::DocumentOperation(DocumentOperation { - remove_from_user_provided, - }) = action - { - remove_from_user_provided - } else { - Default::default() - }; - results.push(ExtractedVectorPoints { manual_vectors: writer_into_reader(manual_vectors_writer)?, remove_vectors: writer_into_reader(remove_vectors_writer)?, prompts: writer_into_reader(prompts_writer)?, - embedder, + inputs: writer_into_reader(inputs_writer)?, + runtime, embedder_name, - add_to_user_provided, - remove_from_user_provided, + embedding_status_delta, }) } Ok((results, unused_vectors_distribution)) } +fn push_embedding_status_delta( + embedding_status_delta: &mut EmbeddingStatusDelta, + docid: DocumentId, + delta: &VectorStateDelta, + new_must_regenerate: bool, + embedding_status: &EmbeddingStatus, +) { + let (old_is_user_provided, old_must_regenerate) = + embedding_status.is_user_provided_must_regenerate(docid); + let new_is_user_provided = match delta { + VectorStateDelta::NoChange => old_is_user_provided, + VectorStateDelta::NowRemoved => { + embedding_status_delta.clear_docid(docid, old_is_user_provided, old_must_regenerate); + return; + } + VectorStateDelta::NowManual(_) => true, + VectorStateDelta::NowGenerated(_) + | VectorStateDelta::UpdateGeneratedFromFragments(_) + | VectorStateDelta::NowGeneratedFromFragments(_) => false, + }; + + embedding_status_delta.push_delta( + docid, + old_is_user_provided, + old_must_regenerate, + new_is_user_provided, + new_must_regenerate, + ); +} + #[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments fn extract_vector_document_diff( - docid: DocumentId, obkv: &obkv::KvReader, - prompt: &Prompt, - (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), + runtime: &RuntimeEmbedder, + doc_alloc: &Bump, (old, new): (VectorState, VectorState), (old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata), document_id: impl Fn() -> Value, @@ -489,16 +694,6 @@ fn extract_vector_document_diff( embedder_is_manual: bool, manual_errors: &mut Option, ) -> Result { - match (old.must_regenerate(), new.must_regenerate()) { - (true, true) | (false, false) => {} - (true, false) => { - add_to_user_provided.insert(docid); - } - (false, true) => { - remove_from_user_provided.insert(docid); - } - } - let delta = match (old, new) { // regardless of the previous state, if a document now contains inline _vectors, they must // be extracted manually @@ -529,22 +724,55 @@ fn extract_vector_document_diff( ManualEmbedderErrors::push_error(manual_errors, embedder_name, document_id); return Ok(VectorStateDelta::NoChange); } - // Don't give up if the old prompt was failing - let old_prompt = Some(&prompt).map(|p| { - p.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map) - .unwrap_or_default() - }); - let new_prompt = - prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?; - if old_prompt.as_ref() != Some(&new_prompt) { - let old_prompt = old_prompt.unwrap_or_default(); - tracing::trace!( - "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" + let has_fragments = !runtime.fragments().is_empty(); + if has_fragments { + let mut fragment_diff = Vec::new(); + let old_fields_ids_map = old_fields_ids_map.as_fields_ids_map(); + let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map(); + + let old_document = crate::update::new::document::KvDelAddDocument::new( + obkv, + DelAdd::Deletion, + old_fields_ids_map, ); - VectorStateDelta::NowGenerated(new_prompt) + + let new_document = crate::update::new::document::KvDelAddDocument::new( + obkv, + DelAdd::Addition, + new_fields_ids_map, + ); + + for new in runtime.fragments() { + let name = &new.name; + let fragment = + RequestFragmentExtractor::new(new, doc_alloc).ignore_errors(); + + let diff = fragment + .diff_documents(&old_document, &new_document, &()) + .expect("ignoring errors so this cannot fail"); + + fragment_diff.push((name.clone(), diff)); + } + VectorStateDelta::UpdateGeneratedFromFragments(fragment_diff) } else { - tracing::trace!("⏭️ Prompt unmodified, skipping"); - VectorStateDelta::NoChange + let prompt = &runtime.document_template; + // Don't give up if the old prompt was failing + let old_prompt = Some(&prompt).map(|p| { + p.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map) + .unwrap_or_default() + }); + let new_prompt = + prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?; + if old_prompt.as_ref() != Some(&new_prompt) { + let old_prompt = old_prompt.unwrap_or_default(); + tracing::trace!( + "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" + ); + VectorStateDelta::NowGenerated(new_prompt) + } else { + tracing::trace!("⏭️ Prompt unmodified, skipping"); + VectorStateDelta::NoChange + } } } else { VectorStateDelta::NowRemoved @@ -566,15 +794,25 @@ fn extract_vector_document_diff( ManualEmbedderErrors::push_error(manual_errors, embedder_name, document_id); return Ok(VectorStateDelta::NoChange); } - // becomes autogenerated - VectorStateDelta::NowGenerated(prompt.render_kvdeladd( - obkv, - DelAdd::Addition, - new_fields_ids_map, - )?) + + let has_fragments = !runtime.fragments().is_empty(); + + if has_fragments { + regenerate_all_fragments( + runtime.fragments(), + doc_alloc, + new_fields_ids_map, + obkv, + ) + } else { + // becomes autogenerated + VectorStateDelta::NowGenerated(runtime.document_template.render_kvdeladd( + obkv, + DelAdd::Addition, + new_fields_ids_map, + )?) + } } else { - // make sure the document is always removed from user provided on removal - remove_from_user_provided.insert(docid); VectorStateDelta::NowRemoved } } @@ -592,8 +830,6 @@ fn extract_vector_document_diff( // then they are user-provided and nothing possibly changed VectorStateDelta::NoChange } else { - // make sure the document is always removed from user provided on removal - remove_from_user_provided.insert(docid); VectorStateDelta::NowRemoved } } @@ -628,16 +864,44 @@ fn regenerate_prompt( Ok(VectorStateDelta::NowGenerated(prompt)) } +fn regenerate_all_fragments<'a>( + fragments: impl IntoIterator, + doc_alloc: &Bump, + new_fields_ids_map: &FieldIdMapWithMetadata, + obkv: &KvReaderU16, +) -> VectorStateDelta { + let mut fragment_diff = Vec::new(); + let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map(); + + let obkv_document = crate::update::new::document::KvDelAddDocument::new( + obkv, + DelAdd::Addition, + new_fields_ids_map, + ); + for new in fragments { + let name = &new.name; + let new = RequestFragmentExtractor::new(new, doc_alloc).ignore_errors(); + + let diff = new.extract(&obkv_document, &()).expect("ignoring errors so this cannot fail"); + if let Some(value) = diff { + fragment_diff.push((name.clone(), value)); + } + } + VectorStateDelta::NowGeneratedFromFragments(fragment_diff) +} + /// We cannot compute the diff between both Del and Add vectors. /// We'll push every vector and compute the difference later in TypedChunk. fn push_vectors_diff( remove_vectors_writer: &mut Writer>, prompts_writer: &mut Writer>, + inputs_writer: &mut Writer>, manual_vectors_writer: &mut Writer>, key_buffer: &mut Vec, delta: VectorStateDelta, + fragments: &[RuntimeFragment], ) -> Result<()> { - let (must_remove, prompt, mut add_vectors) = delta.into_values(); + let (must_remove, prompt, mut fragment_delta, mut add_vectors) = delta.into_values(); if must_remove { key_buffer.truncate(TRUNCATE_SIZE); remove_vectors_writer.insert(&key_buffer, [])?; @@ -647,23 +911,49 @@ fn push_vectors_diff( prompts_writer.insert(&key_buffer, prompt.as_bytes())?; } - // We sort and dedup the vectors - add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); - add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); + if !fragment_delta.is_empty() { + let mut scratch = Vec::new(); + let mut fragment_delta: Vec<_> = fragments + .iter() + .filter_map(|fragment| { + let delta = fragment_delta.remove(&fragment.name)?; + Some((fragment.id, delta)) + }) + .collect(); - // insert vectors into the writer - for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) { - // Generate the key by extending the unique index to it. - key_buffer.truncate(TRUNCATE_SIZE); - let index = u16::try_from(i).unwrap(); - key_buffer.extend_from_slice(&index.to_be_bytes()); + fragment_delta.sort_unstable_by_key(|(id, _)| *id); + for (id, value) in fragment_delta { + key_buffer.truncate(TRUNCATE_SIZE); + key_buffer.push(id); + if let Some(value) = value { + scratch.clear(); + serde_json::to_writer(&mut scratch, &value).unwrap(); + inputs_writer.insert(&key_buffer, &scratch)?; + } else { + inputs_writer.insert(&key_buffer, [])?; + } + } + } - // We insert only the Add part of the Obkv to inform - // that we only want to remove all those vectors. - let mut obkv = KvWriterDelAdd::memory(); - obkv.insert(DelAdd::Addition, cast_slice(&vector))?; - let bytes = obkv.into_inner()?; - manual_vectors_writer.insert(&key_buffer, bytes)?; + if !add_vectors.is_empty() { + // We sort and dedup the vectors + add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); + add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); + + // insert vectors into the writer + for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) { + // Generate the key by extending the unique index to it. + key_buffer.truncate(TRUNCATE_SIZE); + let index = u16::try_from(i).unwrap(); + key_buffer.extend_from_slice(&index.to_be_bytes()); + + // We insert only the Add part of the Obkv to inform + // that we only want to remove all those vectors. + let mut obkv = KvWriterDelAdd::memory(); + obkv.insert(DelAdd::Addition, cast_slice(&vector))?; + let bytes = obkv.into_inner()?; + manual_vectors_writer.insert(&key_buffer, bytes)?; + } } Ok(()) @@ -674,17 +964,20 @@ fn compare_vectors(a: &[f32], b: &[f32]) -> Ordering { a.iter().copied().map(OrderedFloat).cmp(b.iter().copied().map(OrderedFloat)) } +#[allow(clippy::too_many_arguments)] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] -pub fn extract_embeddings( +pub fn extract_embeddings_from_prompts( // docid, prompt prompt_reader: grenad::Reader, indexer: GrenadParameters, - embedder: Arc, + runtime: Arc, embedder_name: &str, possible_embedding_mistakes: &PossibleEmbeddingMistakes, + embedder_stats: &EmbedderStats, unused_vectors_distribution: &UnusedVectorsDistribution, request_threads: &ThreadPoolNoAbort, ) -> Result>> { + let embedder = &runtime.embedder; let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk @@ -720,10 +1013,11 @@ pub fn extract_embeddings( if chunks.len() == chunks.capacity() { let chunked_embeds = embed_chunks( - &embedder, + embedder, std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks)), embedder_name, possible_embedding_mistakes, + embedder_stats, unused_vectors_distribution, request_threads, )?; @@ -742,10 +1036,11 @@ pub fn extract_embeddings( // send last chunk if !chunks.is_empty() { let chunked_embeds = embed_chunks( - &embedder, + embedder, std::mem::take(&mut chunks), embedder_name, possible_embedding_mistakes, + embedder_stats, unused_vectors_distribution, request_threads, )?; @@ -760,10 +1055,11 @@ pub fn extract_embeddings( if !current_chunk.is_empty() { let embeds = embed_chunks( - &embedder, + embedder, vec![std::mem::take(&mut current_chunk)], embedder_name, possible_embedding_mistakes, + embedder_stats, unused_vectors_distribution, request_threads, )?; @@ -783,10 +1079,11 @@ fn embed_chunks( text_chunks: Vec>, embedder_name: &str, possible_embedding_mistakes: &PossibleEmbeddingMistakes, + embedder_stats: &EmbedderStats, unused_vectors_distribution: &UnusedVectorsDistribution, request_threads: &ThreadPoolNoAbort, ) -> Result>> { - match embedder.embed_index(text_chunks, request_threads) { + match embedder.embed_index(text_chunks, request_threads, embedder_stats) { Ok(chunks) => Ok(chunks), Err(error) => { if let FaultSource::Bug = error.fault { @@ -831,3 +1128,175 @@ fn embed_chunks( } } } + +#[allow(clippy::too_many_arguments)] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] +pub fn extract_embeddings_from_fragments( + // (docid, extractor_id) -> (Option) + inputs_reader: grenad::Reader, + indexer: GrenadParameters, + runtime: Arc, + embedder_name: &str, + possible_embedding_mistakes: &PossibleEmbeddingMistakes, + embedder_stats: &EmbedderStats, + unused_vectors_distribution: &UnusedVectorsDistribution, + request_threads: &ThreadPoolNoAbort, +) -> Result>> { + let doc_alloc = Bump::new(); + + // (docid, extractor_id) -> (Option) + let vector_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); + + if inputs_reader.is_empty() { + return writer_into_reader(vector_writer); + } + + let on_embed = WriteGrenadOnEmbed { + waiting_responses: Default::default(), + vector_writer, + scratch: Default::default(), + possible_embedding_mistakes, + }; + + let mut session = EmbedSession::new( + &runtime.embedder, + embedder_name, + request_threads, + &doc_alloc, + embedder_stats, + on_embed, + ); + + let mut cursor = inputs_reader.into_cursor()?; + + while let Some((mut key, value)) = cursor.move_on_next()? { + let docid = key.read_u32::().unwrap(); + let extractor_id = key.read_u8().unwrap(); + + if value.is_empty() { + // no value => removed fragment + session.on_embed_mut().push_response(docid, extractor_id); + } else { + // unwrap: the grenad value was saved as a serde_json::Value + let value: Value = serde_json::from_slice(value).unwrap(); + session.request_embedding( + Metadata { docid, external_docid: "", extractor_id }, + value, + unused_vectors_distribution, + )?; + } + } + + // send last chunk + let on_embed = session.drain(unused_vectors_distribution)?; + on_embed.finish() +} + +struct WriteGrenadOnEmbed<'a> { + // list of (document_id, extractor_id) for which vectors should be removed. + // these are written whenever a response arrives that has a larger (docid, extractor_id). + waiting_responses: VecDeque<(DocumentId, u8)>, + + // grenad of (docid, extractor_id) -> (Option) + vector_writer: Writer>, + + possible_embedding_mistakes: &'a PossibleEmbeddingMistakes, + + // scratch buffer used to write keys + scratch: Vec, +} + +impl WriteGrenadOnEmbed<'_> { + pub fn push_response(&mut self, docid: DocumentId, extractor_id: u8) { + self.waiting_responses.push_back((docid, extractor_id)); + } + + pub fn finish(mut self) -> Result>> { + for (docid, extractor_id) in self.waiting_responses { + self.scratch.clear(); + self.scratch.write_u32::(docid).unwrap(); + self.scratch.write_u8(extractor_id).unwrap(); + self.vector_writer.insert(&self.scratch, []).unwrap(); + } + writer_into_reader(self.vector_writer) + } +} + +impl<'doc> OnEmbed<'doc> for WriteGrenadOnEmbed<'_> { + type ErrorMetadata = UnusedVectorsDistribution; + fn process_embedding_response( + &mut self, + response: crate::vector::session::EmbeddingResponse<'doc>, + ) { + let (docid, extractor_id) = (response.metadata.docid, response.metadata.extractor_id); + while let Some(waiting_response) = self.waiting_responses.pop_front() { + if (docid, extractor_id) > waiting_response { + self.scratch.clear(); + self.scratch.write_u32::(docid).unwrap(); + self.scratch.write_u8(extractor_id).unwrap(); + self.vector_writer.insert(&self.scratch, []).unwrap(); + } else { + self.waiting_responses.push_front(waiting_response); + break; + } + } + + if let Some(embedding) = response.embedding { + self.scratch.clear(); + self.scratch.write_u32::(docid).unwrap(); + self.scratch.write_u8(extractor_id).unwrap(); + self.vector_writer.insert(&self.scratch, cast_slice(embedding.as_slice())).unwrap(); + } + } + + fn process_embedding_error( + &mut self, + error: crate::vector::error::EmbedError, + embedder_name: &'doc str, + unused_vectors_distribution: &crate::vector::error::UnusedVectorsDistribution, + _metadata: bumpalo::collections::Vec<'doc, crate::vector::session::Metadata<'doc>>, + ) -> crate::Error { + if let FaultSource::Bug = error.fault { + crate::Error::InternalError(crate::InternalError::VectorEmbeddingError(error.into())) + } else { + let mut msg = + format!(r"While embedding documents for embedder `{embedder_name}`: {error}"); + + if let EmbedErrorKind::ManualEmbed(_) = &error.kind { + msg += &format!("\n- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`."); + } + + let mut hint_count = 0; + + for (vector_misspelling, count) in + self.possible_embedding_mistakes.vector_mistakes().take(2) + { + msg += &format!("\n- Hint: try replacing `{vector_misspelling}` by `_vectors` in {count} document(s)."); + hint_count += 1; + } + + for (embedder_misspelling, count) in self + .possible_embedding_mistakes + .embedder_mistakes(embedder_name, unused_vectors_distribution) + .take(2) + { + msg += &format!("\n- Hint: try replacing `_vectors.{embedder_misspelling}` by `_vectors.{embedder_name}` in {count} document(s)."); + hint_count += 1; + } + + if hint_count == 0 { + if let EmbedErrorKind::ManualEmbed(_) = &error.kind { + msg += &format!( + "\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`" + ); + } + } + + crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)) + } + } +} diff --git a/crates/milli/src/update/index_documents/extract/mod.rs b/crates/milli/src/update/index_documents/extract/mod.rs index 8cd664a2f..b41fd59e1 100644 --- a/crates/milli/src/update/index_documents/extract/mod.rs +++ b/crates/milli/src/update/index_documents/extract/mod.rs @@ -23,15 +23,17 @@ use self::extract_fid_docid_facet_values::{extract_fid_docid_facet_values, Extra use self::extract_fid_word_count_docids::extract_fid_word_count_docids; use self::extract_geo_points::extract_geo_points; use self::extract_vector_points::{ - extract_embeddings, extract_vector_points, ExtractedVectorPoints, + extract_embeddings_from_prompts, extract_vector_points, ExtractedVectorPoints, }; use self::extract_word_docids::extract_word_docids; use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids; use self::extract_word_position_docids::extract_word_position_docids; use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters}; use super::{helpers, TypedChunk}; -use crate::index::IndexEmbeddingConfig; +use crate::progress::EmbedderStats; +use crate::update::index_documents::extract::extract_vector_points::extract_embeddings_from_fragments; use crate::update::settings::InnerIndexSettingsDiff; +use crate::vector::db::EmbedderInfo; use crate::vector::error::PossibleEmbeddingMistakes; use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; @@ -45,10 +47,11 @@ pub(crate) fn data_from_obkv_documents( indexer: GrenadParameters, lmdb_writer_sx: Sender>, primary_key_id: FieldId, - embedders_configs: Arc>, settings_diff: Arc, max_positions_per_attributes: Option, + embedder_info: Arc>, possible_embedding_mistakes: Arc, + embedder_stats: &Arc, ) -> Result<()> { let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join( || { @@ -59,9 +62,10 @@ pub(crate) fn data_from_obkv_documents( original_documents_chunk, indexer, lmdb_writer_sx.clone(), - embedders_configs.clone(), settings_diff.clone(), + embedder_info.clone(), possible_embedding_mistakes.clone(), + embedder_stats.clone(), ) }) .collect::>() @@ -210,7 +214,7 @@ fn run_extraction_task( }) } -fn request_threads() -> &'static ThreadPoolNoAbort { +pub fn request_threads() -> &'static ThreadPoolNoAbort { static REQUEST_THREADS: OnceLock = OnceLock::new(); REQUEST_THREADS.get_or_init(|| { @@ -228,20 +232,20 @@ fn send_original_documents_data( original_documents_chunk: Result>>, indexer: GrenadParameters, lmdb_writer_sx: Sender>, - embedders_configs: Arc>, settings_diff: Arc, + embedder_info: Arc>, possible_embedding_mistakes: Arc, + embedder_stats: Arc, ) -> Result<()> { let original_documents_chunk = original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only()) // no point in indexing vectors without embedders - && (!settings_diff.new.embedding_configs.inner_as_ref().is_empty()); + && (!settings_diff.new.runtime_embedders.inner_as_ref().is_empty()); if index_vectors { let settings_diff = settings_diff.clone(); - let embedders_configs = embedders_configs.clone(); let original_documents_chunk = original_documents_chunk.clone(); let lmdb_writer_sx = lmdb_writer_sx.clone(); @@ -249,8 +253,8 @@ fn send_original_documents_data( match extract_vector_points( original_documents_chunk.clone(), indexer, - &embedders_configs, &settings_diff, + embedder_info.as_slice(), &possible_embedding_mistakes, ) { Ok((extracted_vectors, unused_vectors_distribution)) => { @@ -258,18 +262,19 @@ fn send_original_documents_data( manual_vectors, remove_vectors, prompts, + inputs, embedder_name, - embedder, - add_to_user_provided, - remove_from_user_provided, + runtime, + embedding_status_delta, } in extracted_vectors { - let embeddings = match extract_embeddings( + let embeddings_from_prompts = match extract_embeddings_from_prompts( prompts, indexer, - embedder.clone(), + runtime.clone(), &embedder_name, &possible_embedding_mistakes, + &embedder_stats, &unused_vectors_distribution, request_threads(), ) { @@ -279,18 +284,37 @@ fn send_original_documents_data( None } }; + + let embeddings_from_fragments = match extract_embeddings_from_fragments( + inputs, + indexer, + runtime.clone(), + &embedder_name, + &possible_embedding_mistakes, + &embedder_stats, + &unused_vectors_distribution, + request_threads(), + ) { + Ok(results) => Some(results), + Err(error) => { + let _ = lmdb_writer_sx.send(Err(error)); + None + } + }; + if !(remove_vectors.is_empty() && manual_vectors.is_empty() - && embeddings.as_ref().is_none_or(|e| e.is_empty())) + && embeddings_from_prompts.as_ref().is_none_or(|e| e.is_empty()) + && embeddings_from_fragments.as_ref().is_none_or(|e| e.is_empty())) { let _ = lmdb_writer_sx.send(Ok(TypedChunk::VectorPoints { remove_vectors, - embeddings, - expected_dimension: embedder.dimensions(), + embeddings_from_prompts, + embeddings_from_fragments, + expected_dimension: runtime.embedder.dimensions(), manual_vectors, embedder_name, - add_to_user_provided, - remove_from_user_provided, + embedding_status_delta, })); } } diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 379b991e0..658ff1923 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -12,6 +12,7 @@ use std::sync::Arc; use crossbeam_channel::{Receiver, Sender}; use enrich::enrich_documents_batch; +pub use extract::request_threads; use grenad::{Merger, MergerBuilder}; use hashbrown::HashMap; use heed::types::Str; @@ -32,12 +33,13 @@ use crate::database_stats::DatabaseStats; use crate::documents::{obkv_to_object, DocumentsBatchReader}; use crate::error::{Error, InternalError}; use crate::index::{PrefixSearch, PrefixSettings}; -use crate::progress::Progress; +use crate::progress::{EmbedderStats, Progress}; pub use crate::update::index_documents::helpers::CursorClonableMmap; use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; -use crate::vector::{ArroyWrapper, EmbeddingConfigs}; +use crate::vector::db::EmbedderInfo; +use crate::vector::{ArroyWrapper, RuntimeEmbedders}; use crate::{CboRoaringBitmapCodec, Index, Result, UserError}; static MERGED_DATABASE_COUNT: usize = 7; @@ -80,7 +82,8 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> { should_abort: FA, added_documents: u64, deleted_documents: u64, - embedders: EmbeddingConfigs, + embedders: RuntimeEmbedders, + embedder_stats: &'t Arc, } #[derive(Default, Debug, Clone)] @@ -103,6 +106,7 @@ where config: IndexDocumentsConfig, progress: FP, should_abort: FA, + embedder_stats: &'t Arc, ) -> Result> { let transform = Some(Transform::new( wtxn, @@ -123,6 +127,7 @@ where added_documents: 0, deleted_documents: 0, embedders: Default::default(), + embedder_stats, }) } @@ -168,7 +173,7 @@ where Ok((self, Ok(indexed_documents))) } - pub fn with_embedders(mut self, embedders: EmbeddingConfigs) -> Self { + pub fn with_embedders(mut self, embedders: RuntimeEmbedders) -> Self { self.embedders = embedders; self } @@ -222,7 +227,13 @@ where settings_diff.new.recompute_searchables(self.wtxn, self.index)?; let settings_diff = Arc::new(settings_diff); - let embedders_configs = Arc::new(self.index.embedding_configs(self.wtxn)?); + let embedder_infos: heed::Result> = self + .index + .embedding_configs() + .iter_embedder_info(self.wtxn)? + .map(|res| res.map(|(name, info)| (name.to_owned(), info))) + .collect(); + let embedder_infos = Arc::new(embedder_infos?); let possible_embedding_mistakes = crate::vector::error::PossibleEmbeddingMistakes::new(&field_distribution); @@ -292,6 +303,7 @@ where // Run extraction pipeline in parallel. let mut modified_docids = RoaringBitmap::new(); + let embedder_stats = self.embedder_stats.clone(); pool.install(|| { let settings_diff_cloned = settings_diff.clone(); rayon::spawn(move || { @@ -323,10 +335,11 @@ where pool_params, lmdb_writer_sx.clone(), primary_key_id, - embedders_configs.clone(), settings_diff_cloned, max_positions_per_attributes, - Arc::new(possible_embedding_mistakes) + embedder_infos, + Arc::new(possible_embedding_mistakes), + &embedder_stats ) }); @@ -424,21 +437,21 @@ where TypedChunk::VectorPoints { expected_dimension, remove_vectors, - embeddings, + embeddings_from_prompts, + embeddings_from_fragments, manual_vectors, embedder_name, - add_to_user_provided, - remove_from_user_provided, + embedding_status_delta, } => { dimension.insert(embedder_name.clone(), expected_dimension); TypedChunk::VectorPoints { remove_vectors, - embeddings, + embeddings_from_prompts, + embeddings_from_fragments, expected_dimension, manual_vectors, embedder_name, - add_to_user_provided, - remove_from_user_provided, + embedding_status_delta, } } otherwise => otherwise, @@ -474,7 +487,7 @@ where // we should insert it in `dimension` for (name, action) in settings_diff.embedding_config_updates.iter() { if action.is_being_quantized && !dimension.contains_key(name.as_str()) { - let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or( + let index = self.index.embedding_configs().embedder_id(self.wtxn, name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None, @@ -482,7 +495,9 @@ where )?; let reader = ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized); - let dim = reader.dimensions(self.wtxn)?; + let Some(dim) = reader.dimensions(self.wtxn)? else { + continue; + }; dimension.insert(name.to_string(), dim); } } @@ -492,12 +507,19 @@ where let vector_arroy = self.index.vector_arroy; let cancel = &self.should_abort; - let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( - InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, - )?; + let embedder_index = + self.index.embedding_configs().embedder_id(wtxn, &embedder_name)?.ok_or( + InternalError::DatabaseMissingEntry { + db_name: "embedder_category_id", + key: None, + }, + )?; let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name); - let was_quantized = - settings_diff.old.embedding_configs.get(&embedder_name).is_some_and(|conf| conf.2); + let was_quantized = settings_diff + .old + .runtime_embedders + .get(&embedder_name) + .is_some_and(|conf| conf.is_quantized); let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized); pool.install(|| { @@ -767,11 +789,11 @@ mod tests { use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::mmap_from_objects; use crate::index::tests::TempIndex; - use crate::index::IndexEmbeddingConfig; use crate::progress::Progress; use crate::search::TermsMatchingStrategy; use crate::update::new::indexer; use crate::update::Setting; + use crate::vector::db::IndexEmbeddingConfig; use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError}; #[test] @@ -1562,12 +1584,12 @@ mod tests { let rtxn = index.read_txn().unwrap(); // Only the first document should match. - let count = index.word_docids.get(&rtxn, "huàzhuāngbāo").unwrap().unwrap().len(); + let count = index.word_docids.get(&rtxn, "huàzhuāng").unwrap().unwrap().len(); assert_eq!(count, 1); // Only the second document should match. let count = index.word_docids.get(&rtxn, "bāo").unwrap().unwrap().len(); - assert_eq!(count, 1); + assert_eq!(count, 2); let mut search = crate::Search::new(&rtxn, &index); search.query("化妆包"); @@ -2022,9 +2044,10 @@ mod tests { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2109,9 +2132,10 @@ mod tests { new_fields_ids_map, primary_key, &document_changes, - EmbeddingConfigs::default(), + RuntimeEmbedders::default(), &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2269,7 +2293,7 @@ mod tests { ]); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.replace_documents(&documents).unwrap(); indexer.delete_documents(&["2"]); @@ -2297,6 +2321,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2334,7 +2359,7 @@ mod tests { indexer.delete_documents(&["1", "2"]); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( &indexer_alloc, @@ -2359,6 +2384,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2384,7 +2410,7 @@ mod tests { { "id": 3, "name": "jean", "age": 25 }, ]); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.update_documents(&documents).unwrap(); @@ -2412,6 +2438,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2435,7 +2462,7 @@ mod tests { { "id": 3, "legs": 4 }, ]); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.update_documents(&documents).unwrap(); indexer.delete_documents(&["1", "2"]); @@ -2464,6 +2491,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2484,7 +2512,7 @@ mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1", "2"]); @@ -2518,6 +2546,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2539,7 +2568,7 @@ mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1", "2", "1", "2"]); @@ -2577,6 +2606,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2597,7 +2627,7 @@ mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let documents = documents!([ @@ -2629,6 +2659,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2646,7 +2677,7 @@ mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1"]); @@ -2681,6 +2712,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2759,6 +2791,8 @@ mod tests { document_template: Setting::NotSet, document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, + indexing_fragments: Setting::NotSet, + search_fragments: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, distribution: Setting::NotSet, @@ -2785,17 +2819,27 @@ mod tests { .unwrap(); let rtxn = index.read_txn().unwrap(); - let mut embedding_configs = index.embedding_configs(&rtxn).unwrap(); - let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_provided } = + let embedders = index.embedding_configs(); + let mut embedding_configs = embedders.embedding_configs(&rtxn).unwrap(); + let IndexEmbeddingConfig { name: embedder_name, config: embedder, fragments } = embedding_configs.pop().unwrap(); + let info = embedders.embedder_info(&rtxn, &embedder_name).unwrap().unwrap(); + insta::assert_snapshot!(info.embedder_id, @"0"); + insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0, 1, 2]>"); + insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0, 1, 2]>"); insta::assert_snapshot!(embedder_name, @"manual"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>"); + insta::assert_debug_snapshot!(fragments, @r###" + FragmentConfigs( + [], + ) + "###); + let embedder = std::sync::Arc::new( crate::vector::Embedder::new(embedder.embedder_options, 0).unwrap(), ); let res = index .search(&rtxn) - .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec())) + .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()), None) .execute() .unwrap(); assert_eq!(res.documents_ids.len(), 3); @@ -2844,7 +2888,7 @@ mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); // OP @@ -2879,6 +2923,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2904,7 +2949,7 @@ mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1"]); @@ -2938,6 +2983,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); @@ -2962,7 +3008,7 @@ mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let indexer_alloc = Bump::new(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let documents = documents!([ @@ -2994,6 +3040,7 @@ mod tests { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); wtxn.commit().unwrap(); diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index e17625ad4..e07483aff 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -31,7 +31,7 @@ use crate::update::index_documents::GrenadParameters; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; -use crate::vector::settings::WriteBackToDocuments; +use crate::vector::settings::{RemoveFragments, WriteBackToDocuments}; use crate::vector::ArroyWrapper; use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result}; @@ -933,10 +933,47 @@ impl<'a, 'i> Transform<'a, 'i> { // delete all vectors from the embedders that need removal for (_, (reader, _)) in readers { - let dimensions = reader.dimensions(wtxn)?; + let Some(dimensions) = reader.dimensions(wtxn)? else { + continue; + }; reader.clear(wtxn, dimensions)?; } + // remove all vectors for the specified fragments + for (embedder_name, RemoveFragments { fragment_ids }, was_quantized) in + settings_diff.embedding_config_updates.iter().filter_map(|(name, action)| { + action.remove_fragments().map(|fragments| (name, fragments, action.was_quantized)) + }) + { + let Some(infos) = self.index.embedding_configs().embedder_info(wtxn, embedder_name)? + else { + continue; + }; + let arroy = + ArroyWrapper::new(self.index.vector_arroy, infos.embedder_id, was_quantized); + let Some(dimensions) = arroy.dimensions(wtxn)? else { + continue; + }; + for fragment_id in fragment_ids { + // we must keep the user provided embeddings that ended up in this store + + if infos.embedding_status.user_provided_docids().is_empty() { + // no user provided: clear store + arroy.clear_store(wtxn, *fragment_id, dimensions)?; + continue; + } + + // some user provided, remove only the ids that are not user provided + let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| { + items - infos.embedding_status.user_provided_docids() + })?; + + for to_delete in to_delete { + arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?; + } + } + } + let grenad_params = GrenadParameters { chunk_compression_type: self.indexer_settings.chunk_compression_type, chunk_compression_level: self.indexer_settings.chunk_compression_level, diff --git a/crates/milli/src/update/index_documents/typed_chunk.rs b/crates/milli/src/update/index_documents/typed_chunk.rs index 6d575a98b..c93e3e0f7 100644 --- a/crates/milli/src/update/index_documents/typed_chunk.rs +++ b/crates/milli/src/update/index_documents/typed_chunk.rs @@ -4,6 +4,7 @@ use std::fs::File; use std::io::{self, BufReader}; use bytemuck::allocation::pod_collect_to_vec; +use byteorder::{BigEndian, ReadBytesExt as _}; use grenad::{MergeFunction, Merger, MergerBuilder}; use heed::types::Bytes; use heed::{BytesDecode, RwTxn}; @@ -18,7 +19,6 @@ use super::helpers::{ use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind}; use crate::facet::FacetType; use crate::index::db_name::DOCUMENTS; -use crate::index::IndexEmbeddingConfig; use crate::proximity::MAX_DISTANCE; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; use crate::update::facet::FacetsUpdate; @@ -26,6 +26,7 @@ use crate::update::index_documents::helpers::{ as_cloneable_grenad, try_split_array_at, KeepLatestObkv, }; use crate::update::settings::InnerIndexSettingsDiff; +use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig}; use crate::vector::ArroyWrapper; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, @@ -86,12 +87,14 @@ pub(crate) enum TypedChunk { GeoPoints(grenad::Reader>), VectorPoints { remove_vectors: grenad::Reader>, - embeddings: Option>>, + // docid -> vector + embeddings_from_prompts: Option>>, + // docid, extractor_id -> Option, + embeddings_from_fragments: Option>>, expected_dimension: usize, manual_vectors: grenad::Reader>, embedder_name: String, - add_to_user_provided: RoaringBitmap, - remove_from_user_provided: RoaringBitmap, + embedding_status_delta: EmbeddingStatusDelta, }, } @@ -155,6 +158,7 @@ pub(crate) fn write_typed_chunk_into_index( let mut iter = merger.into_stream_merger_iter()?; let embedders: BTreeSet<_> = index + .embedding_configs() .embedding_configs(wtxn)? .into_iter() .map(|IndexEmbeddingConfig { name, .. }| name) @@ -614,57 +618,66 @@ pub(crate) fn write_typed_chunk_into_index( let span = tracing::trace_span!(target: "indexing::write_db", "vector_points"); let _entered = span.enter(); + let embedders = index.embedding_configs(); + let mut remove_vectors_builder = MergerBuilder::new(KeepFirst); let mut manual_vectors_builder = MergerBuilder::new(KeepFirst); - let mut embeddings_builder = MergerBuilder::new(KeepFirst); - let mut add_to_user_provided = RoaringBitmap::new(); - let mut remove_from_user_provided = RoaringBitmap::new(); + let mut embeddings_from_prompts_builder = MergerBuilder::new(KeepFirst); + let mut embeddings_from_fragments_builder = MergerBuilder::new(KeepFirst); let mut params = None; + let mut infos = None; for typed_chunk in typed_chunks { let TypedChunk::VectorPoints { remove_vectors, manual_vectors, - embeddings, + embeddings_from_prompts, + embeddings_from_fragments, expected_dimension, embedder_name, - add_to_user_provided: aud, - remove_from_user_provided: rud, + embedding_status_delta, } = typed_chunk else { unreachable!(); }; + if infos.is_none() { + infos = Some(embedders.embedder_info(wtxn, &embedder_name)?.ok_or( + InternalError::DatabaseMissingEntry { + db_name: "embedder_category_id", + key: None, + }, + )?); + } + params = Some((expected_dimension, embedder_name)); remove_vectors_builder.push(remove_vectors.into_cursor()?); manual_vectors_builder.push(manual_vectors.into_cursor()?); - if let Some(embeddings) = embeddings { - embeddings_builder.push(embeddings.into_cursor()?); + if let Some(embeddings) = embeddings_from_prompts { + embeddings_from_prompts_builder.push(embeddings.into_cursor()?); + } + if let Some(embeddings) = embeddings_from_fragments { + embeddings_from_fragments_builder.push(embeddings.into_cursor()?); + } + + if let Some(infos) = &mut infos { + embedding_status_delta.apply_to(&mut infos.embedding_status); } - add_to_user_provided |= aud; - remove_from_user_provided |= rud; } // typed chunks has always at least 1 chunk. let Some((expected_dimension, embedder_name)) = params else { unreachable!() }; + let Some(infos) = infos else { unreachable!() }; - let mut embedding_configs = index.embedding_configs(wtxn)?; - let index_embedder_config = embedding_configs - .iter_mut() - .find(|IndexEmbeddingConfig { name, .. }| name == &embedder_name) - .unwrap(); - index_embedder_config.user_provided -= remove_from_user_provided; - index_embedder_config.user_provided |= add_to_user_provided; + embedders.put_embedder_info(wtxn, &embedder_name, &infos)?; - index.put_embedding_configs(wtxn, embedding_configs)?; - - let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( - InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, - )?; - let binary_quantized = - settings_diff.old.embedding_configs.get(&embedder_name).is_some_and(|conf| conf.2); + let binary_quantized = settings_diff + .old + .runtime_embedders + .get(&embedder_name) + .is_some_and(|conf| conf.is_quantized); // FIXME: allow customizing distance - let writer = ArroyWrapper::new(index.vector_arroy, embedder_index, binary_quantized); + let writer = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, binary_quantized); // remove vectors for docids we want them removed let merger = remove_vectors_builder.build(); @@ -674,8 +687,8 @@ pub(crate) fn write_typed_chunk_into_index( writer.del_items(wtxn, expected_dimension, docid)?; } - // add generated embeddings - let merger = embeddings_builder.build(); + // add generated embeddings -- from prompts + let merger = embeddings_from_prompts_builder.build(); let mut iter = merger.into_stream_merger_iter()?; while let Some((key, value)) = iter.next()? { let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap(); @@ -702,6 +715,24 @@ pub(crate) fn write_typed_chunk_into_index( writer.add_items(wtxn, docid, &embeddings)?; } + // add generated embeddings -- from fragments + let merger = embeddings_from_fragments_builder.build(); + let mut iter = merger.into_stream_merger_iter()?; + while let Some((mut key, value)) = iter.next()? { + let docid = key.read_u32::().unwrap(); + let extractor_id = key.read_u8().unwrap(); + if value.is_empty() { + writer.del_item_in_store(wtxn, docid, extractor_id, expected_dimension)?; + } else { + let data = pod_collect_to_vec(value); + // it is a code error to have embeddings and not expected_dimension + if data.len() != expected_dimension { + panic!("wrong dimensions") + } + writer.add_item_in_store(wtxn, docid, extractor_id, &data)?; + } + } + // perform the manual diff let merger = manual_vectors_builder.build(); let mut iter = merger.into_stream_merger_iter()?; diff --git a/crates/milli/src/update/indexer_config.rs b/crates/milli/src/update/indexer_config.rs index eb7fbd4d5..a0f901818 100644 --- a/crates/milli/src/update/indexer_config.rs +++ b/crates/milli/src/update/indexer_config.rs @@ -15,6 +15,7 @@ pub struct IndexerConfig { pub thread_pool: ThreadPoolNoAbort, pub max_positions_per_attributes: Option, pub skip_index_budget: bool, + pub experimental_no_edition_2024_for_settings: bool, } impl IndexerConfig { @@ -63,6 +64,7 @@ impl Default for IndexerConfig { chunk_compression_level: None, max_positions_per_attributes: None, skip_index_budget: false, + experimental_no_edition_2024_for_settings: false, } } } diff --git a/crates/milli/src/update/mod.rs b/crates/milli/src/update/mod.rs index ebb313dcf..64eb9f1d3 100644 --- a/crates/milli/src/update/mod.rs +++ b/crates/milli/src/update/mod.rs @@ -1,9 +1,10 @@ pub use self::available_ids::AvailableIds; +pub use self::chat::ChatSettings; pub use self::clear_documents::ClearDocuments; pub use self::concurrent_available_ids::ConcurrentAvailableIds; pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::incremental::FacetsUpdateIncrementalInner; -pub use self::index_documents::*; +pub use self::index_documents::{request_threads, *}; pub use self::indexer_config::{default_thread_pool_and_threads, IndexerConfig}; pub use self::new::ChannelCongestion; pub use self::settings::{validate_embedding_settings, Setting, Settings}; @@ -13,6 +14,7 @@ pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids; pub use self::words_prefixes_fst::WordsPrefixesFst; mod available_ids; +mod chat; mod clear_documents; mod concurrent_available_ids; pub(crate) mod del_add; diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index 4fff31a35..aec192ace 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -138,6 +138,7 @@ pub enum ReceiverAction { WakeUp, LargeEntry(LargeEntry), LargeVectors(LargeVectors), + LargeVector(LargeVector), } /// An entry that cannot fit in the BBQueue buffers has been @@ -174,6 +175,24 @@ impl LargeVectors { } } +#[derive(Debug)] +pub struct LargeVector { + /// The document id associated to the large embedding. + pub docid: DocumentId, + /// The embedder id in which to insert the large embedding. + pub embedder_id: u8, + /// The extractor id in which to insert the large embedding. + pub extractor_id: u8, + /// The large embedding that must be written. + pub embedding: Mmap, +} + +impl LargeVector { + pub fn read_embedding(&self, dimensions: usize) -> &[f32] { + self.embedding.chunks_exact(dimensions).map(bytemuck::cast_slice).next().unwrap() + } +} + impl<'a> WriterBbqueueReceiver<'a> { /// Tries to receive an action to do until the timeout occurs /// and if it does, consider it as a spurious wake up. @@ -238,6 +257,7 @@ pub enum EntryHeader { DbOperation(DbOperation), ArroyDeleteVector(ArroyDeleteVector), ArroySetVectors(ArroySetVectors), + ArroySetVector(ArroySetVector), } impl EntryHeader { @@ -250,6 +270,7 @@ impl EntryHeader { EntryHeader::DbOperation(_) => 0, EntryHeader::ArroyDeleteVector(_) => 1, EntryHeader::ArroySetVectors(_) => 2, + EntryHeader::ArroySetVector(_) => 3, } } @@ -274,11 +295,17 @@ impl EntryHeader { Self::variant_size() + mem::size_of::() + embedding_size * count } + fn total_set_vector_size(dimensions: usize) -> usize { + let embedding_size = dimensions * mem::size_of::(); + Self::variant_size() + mem::size_of::() + embedding_size + } + fn header_size(&self) -> usize { let payload_size = match self { EntryHeader::DbOperation(op) => mem::size_of_val(op), EntryHeader::ArroyDeleteVector(adv) => mem::size_of_val(adv), EntryHeader::ArroySetVectors(asvs) => mem::size_of_val(asvs), + EntryHeader::ArroySetVector(asv) => mem::size_of_val(asv), }; Self::variant_size() + payload_size } @@ -301,6 +328,11 @@ impl EntryHeader { let header = checked::pod_read_unaligned(header_bytes); EntryHeader::ArroySetVectors(header) } + 3 => { + let header_bytes = &remaining[..mem::size_of::()]; + let header = checked::pod_read_unaligned(header_bytes); + EntryHeader::ArroySetVector(header) + } id => panic!("invalid variant id: {id}"), } } @@ -311,6 +343,7 @@ impl EntryHeader { EntryHeader::DbOperation(op) => bytemuck::bytes_of(op), EntryHeader::ArroyDeleteVector(adv) => bytemuck::bytes_of(adv), EntryHeader::ArroySetVectors(asvs) => bytemuck::bytes_of(asvs), + EntryHeader::ArroySetVector(asv) => bytemuck::bytes_of(asv), }; *first = self.variant_id(); remaining.copy_from_slice(payload_bytes); @@ -379,6 +412,37 @@ impl ArroySetVectors { } } +#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)] +#[repr(C)] +/// The embeddings are in the remaining space and represents +/// non-aligned [f32] each with dimensions f32s. +pub struct ArroySetVector { + pub docid: DocumentId, + pub embedder_id: u8, + pub extractor_id: u8, + _padding: [u8; 2], +} + +impl ArroySetVector { + fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] { + let skip = EntryHeader::variant_size() + mem::size_of::(); + &frame[skip..] + } + + /// Read the embedding and write it into an aligned `f32` Vec. + pub fn read_all_embeddings_into_vec<'v>( + &self, + frame: &FrameGrantR<'_>, + vec: &'v mut Vec, + ) -> &'v [f32] { + let embeddings_bytes = Self::embeddings_bytes(frame); + let embeddings_count = embeddings_bytes.len() / mem::size_of::(); + vec.resize(embeddings_count, 0.0); + bytemuck::cast_slice_mut(vec.as_mut()).copy_from_slice(embeddings_bytes); + &vec[..] + } +} + #[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)] #[repr(u16)] pub enum Database { @@ -398,6 +462,7 @@ pub enum Database { FacetIdStringDocids, FieldIdDocidFacetStrings, FieldIdDocidFacetF64s, + VectorEmbedderCategoryId, } impl Database { @@ -419,6 +484,7 @@ impl Database { Database::FacetIdStringDocids => index.facet_id_string_docids.remap_types(), Database::FieldIdDocidFacetStrings => index.field_id_docid_facet_strings.remap_types(), Database::FieldIdDocidFacetF64s => index.field_id_docid_facet_f64s.remap_types(), + Database::VectorEmbedderCategoryId => index.embedder_category_id.remap_types(), } } @@ -440,6 +506,7 @@ impl Database { Database::FacetIdStringDocids => db_name::FACET_ID_STRING_DOCIDS, Database::FieldIdDocidFacetStrings => db_name::FIELD_ID_DOCID_FACET_STRINGS, Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S, + Database::VectorEmbedderCategoryId => db_name::VECTOR_EMBEDDER_CATEGORY_ID, } } } @@ -568,6 +635,82 @@ impl<'b> ExtractorBbqueueSender<'b> { Ok(()) } + fn set_vector_for_extractor( + &self, + docid: u32, + embedder_id: u8, + extractor_id: u8, + embedding: Option, + ) -> crate::Result<()> { + let max_grant = self.max_grant; + let refcell = self.producers.get().unwrap(); + let mut producer = refcell.0.borrow_mut_or_yield(); + + // If there are no vectors we specify the dimensions + // to zero to allocate no extra space at all + let dimensions = embedding.as_ref().map_or(0, |emb| emb.len()); + + let arroy_set_vector = + ArroySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] }; + let payload_header = EntryHeader::ArroySetVector(arroy_set_vector); + let total_length = EntryHeader::total_set_vector_size(dimensions); + if total_length > max_grant { + let mut value_file = tempfile::tempfile().map(BufWriter::new)?; + let embedding = embedding.expect("set_vector without a vector does not fit in RAM"); + + let mut embedding_bytes = bytemuck::cast_slice(&embedding); + io::copy(&mut embedding_bytes, &mut value_file)?; + + let value_file = value_file.into_inner().map_err(|ie| ie.into_error())?; + let embedding = unsafe { Mmap::map(&value_file)? }; + + let large_vectors = LargeVector { docid, embedder_id, extractor_id, embedding }; + self.sender.send(ReceiverAction::LargeVector(large_vectors)).unwrap(); + + return Ok(()); + } + + // Spin loop to have a frame the size we requested. + reserve_and_write_grant( + &mut producer, + total_length, + &self.sender, + &self.sent_messages_attempts, + &self.blocking_sent_messages_attempts, + |grant| { + let header_size = payload_header.header_size(); + let (header_bytes, remaining) = grant.split_at_mut(header_size); + payload_header.serialize_into(header_bytes); + + if dimensions != 0 { + let output_iter = + remaining.chunks_exact_mut(dimensions * mem::size_of::()); + + for (embedding, output) in embedding.iter().zip(output_iter) { + output.copy_from_slice(bytemuck::cast_slice(embedding)); + } + } + + Ok(()) + }, + )?; + + Ok(()) + } + + fn embedding_status( + &self, + name: &str, + infos: crate::vector::db::EmbedderInfo, + ) -> crate::Result<()> { + let bytes = infos.to_bytes().map_err(|_| { + InternalError::Serialization(crate::SerializationError::Encoding { + db_name: Some(Database::VectorEmbedderCategoryId.database_name()), + }) + })?; + self.write_key_value(Database::VectorEmbedderCategoryId, name.as_bytes(), &bytes) + } + fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> { let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| { InternalError::StorePut { @@ -942,9 +1085,18 @@ impl EmbeddingSender<'_, '_> { &self, docid: DocumentId, embedder_id: u8, - embedding: Embedding, + extractor_id: u8, + embedding: Option, ) -> crate::Result<()> { - self.0.set_vectors(docid, embedder_id, &[embedding]) + self.0.set_vector_for_extractor(docid, embedder_id, extractor_id, embedding) + } + + pub(crate) fn embedding_status( + &self, + name: &str, + infos: crate::vector::db::EmbedderInfo, + ) -> crate::Result<()> { + self.0.embedding_status(name, infos) } } diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs index 1ef44fc8d..d520bb952 100644 --- a/crates/milli/src/update/new/document.rs +++ b/crates/milli/src/update/new/document.rs @@ -1,7 +1,10 @@ +use std::cell::{Cell, RefCell}; use std::collections::{BTreeMap, BTreeSet}; +use std::sync::RwLock; +use bumpalo::Bump; use bumparaw_collections::RawMap; -use heed::RoTxn; +use heed::{RoTxn, WithoutTls}; use rustc_hash::FxBuildHasher; use serde_json::value::RawValue; @@ -9,7 +12,14 @@ use super::vector_document::VectorDocument; use super::{KvReaderFieldId, KvWriterFieldId}; use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::documents::FieldIdMapper; -use crate::{DocumentId, GlobalFieldsIdsMap, Index, InternalError, Result, UserError}; +use crate::update::del_add::KvReaderDelAdd; +use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; +use crate::update::new::vector_document::VectorDocumentFromDb; +use crate::vector::settings::EmbedderAction; +use crate::{ + DocumentId, FieldIdMapWithMetadata, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, + Result, UserError, +}; /// A view into a document that can represent either the current version from the DB, /// the update data from payload or other means, or the merged updated version. @@ -309,6 +319,7 @@ where pub fn write_to_obkv<'s, 'a, 'map, 'buffer>( document: &'s impl Document<'s>, vector_document: Option<&'s impl VectorDocument<'s>>, + embedder_actions: &'a BTreeMap, fields_ids_map: &'a mut GlobalFieldsIdsMap<'map>, mut document_buffer: &'a mut bumpalo::collections::Vec<'buffer, u8>, ) -> Result<&'a KvReaderFieldId> @@ -338,20 +349,39 @@ where for res in vector_document.iter_vectors() { let (name, entry) = res?; if entry.has_configured_embedder { - continue; // we don't write vectors with configured embedder in documents + if let Some(action) = embedder_actions.get(name) { + if action.write_back().is_some() && !entry.regenerate { + vectors.insert( + name, + serde_json::json!({ + "regenerate": entry.regenerate, + // TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object + "embeddings": entry.embeddings, + }), + ); + } + } + } else { + match embedder_actions.get(name) { + Some(action) if action.write_back().is_none() => { + continue; + } + _ => { + vectors.insert( + name, + if entry.implicit { + serde_json::json!(entry.embeddings) + } else { + serde_json::json!({ + "regenerate": entry.regenerate, + // TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object + "embeddings": entry.embeddings, + }) + }, + ); + } + } } - vectors.insert( - name, - if entry.implicit { - serde_json::json!(entry.embeddings) - } else { - serde_json::json!({ - "regenerate": entry.regenerate, - // TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object - "embeddings": entry.embeddings, - }) - }, - ); } if vectors.is_empty() { @@ -439,3 +469,231 @@ impl<'doc> Versions<'doc> { self.data.get(k) } } + +#[derive(Debug)] +pub struct KvDelAddDocument<'a, Mapper: FieldIdMapper> { + document: &'a obkv::KvReaderU16, + side: crate::update::del_add::DelAdd, + fields_ids_map: &'a Mapper, +} + +impl<'a, Mapper: FieldIdMapper> KvDelAddDocument<'a, Mapper> { + pub fn new( + document: &'a obkv::KvReaderU16, + side: crate::update::del_add::DelAdd, + fields_ids_map: &'a Mapper, + ) -> Self { + Self { document, side, fields_ids_map } + } + + fn get(&self, k: &str) -> Result> { + let Some(id) = self.fields_ids_map.id(k) else { return Ok(None) }; + let Some(value) = self.document.get(id) else { return Ok(None) }; + let Some(value) = KvReaderDelAdd::from_slice(value).get(self.side) else { return Ok(None) }; + + let value = serde_json::from_slice(value).map_err(crate::InternalError::SerdeJson)?; + + Ok(Some(value)) + } +} + +impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> { + fn iter_top_level_fields(&self) -> impl Iterator> { + let mut it = self.document.iter(); + + std::iter::from_fn(move || loop { + let (fid, value) = it.next()?; + let Some(value) = KvReaderDelAdd::from_slice(value).get(self.side) else { + continue; + }; + let name = match self.fields_ids_map.name(fid).ok_or( + InternalError::FieldIdMapMissingEntry(crate::FieldIdMapMissingEntry::FieldId { + field_id: fid, + process: "getting current document", + }), + ) { + Ok(name) => name, + Err(error) => return Some(Err(error.into())), + }; + + if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME { + continue; + } + + let res = (|| { + let value = + serde_json::from_slice(value).map_err(crate::InternalError::SerdeJson)?; + + Ok((name, value)) + })(); + + return Some(res); + }) + } + + fn top_level_fields_count(&self) -> usize { + let mut it = self.document.iter(); + + std::iter::from_fn(move || loop { + let (fid, value) = it.next()?; + let Some(_) = KvReaderDelAdd::from_slice(value).get(self.side) else { + continue; + }; + let name = match self.fields_ids_map.name(fid).ok_or( + InternalError::FieldIdMapMissingEntry(crate::FieldIdMapMissingEntry::FieldId { + field_id: fid, + process: "getting current document", + }), + ) { + Ok(name) => name, + Err(_) => return Some(()), + }; + + if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME { + continue; + } + + return Some(()); + }) + .count() + } + + fn top_level_field(&self, k: &str) -> Result> { + if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME { + return Ok(None); + } + self.get(k) + } + + fn vectors_field(&self) -> Result> { + self.get(RESERVED_VECTORS_FIELD_NAME) + } + + fn geo_field(&self) -> Result> { + self.get(RESERVED_GEO_FIELD_NAME) + } +} + +pub struct DocumentIdentifiers<'doc> { + docid: DocumentId, + external_document_id: &'doc str, +} + +impl<'doc> DocumentIdentifiers<'doc> { + pub fn create(docid: DocumentId, external_document_id: &'doc str) -> Self { + Self { docid, external_document_id } + } + + pub fn docid(&self) -> DocumentId { + self.docid + } + + pub fn external_document_id(&self) -> &'doc str { + self.external_document_id + } + + pub fn current<'a, Mapper: FieldIdMapper>( + &self, + rtxn: &'a RoTxn, + index: &'a Index, + mapper: &'a Mapper, + ) -> Result> { + Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or( + crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid }, + )?) + } + + pub fn current_vectors<'a, Mapper: FieldIdMapper>( + &self, + rtxn: &'a RoTxn, + index: &'a Index, + mapper: &'a Mapper, + doc_alloc: &'a Bump, + ) -> Result> { + Ok(VectorDocumentFromDb::new(self.docid, index, rtxn, mapper, doc_alloc)?.ok_or( + crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid }, + )?) + } +} + +pub struct DocumentContext< + 'doc, // covariant lifetime of a single `process` call + 'extractor: 'doc, // invariant lifetime of the extractor_allocs + 'fid: 'doc, // invariant lifetime of the new_fields_ids_map + 'indexer: 'doc, // covariant lifetime of objects that outlive a single `process` call + T: MostlySend, +> { + /// The index we're indexing in + pub index: &'indexer Index, + /// The fields ids map as it was at the start of this indexing process. Contains at least all top-level fields from documents + /// inside of the DB. + pub db_fields_ids_map: &'indexer FieldsIdsMap, + /// A transaction providing data from the DB before all indexing operations + pub rtxn: RoTxn<'indexer, WithoutTls>, + + /// Global field id map that is up to date with the current state of the indexing process. + /// + /// - Inserting a field will take a lock + /// - Retrieving a field may take a lock as well + pub new_fields_ids_map: &'doc std::cell::RefCell>, + + /// Data allocated in this allocator is cleared between each call to `process`. + pub doc_alloc: Bump, + + /// Data allocated in this allocator is not cleared between each call to `process`, unless the data spills. + pub extractor_alloc: &'extractor Bump, + + /// Pool of doc allocators, used to retrieve the doc allocator we provided for the documents + pub doc_allocs: &'doc ThreadLocal>>, + + /// Extractor-specific data + pub data: &'doc T, +} + +impl< + 'doc, // covariant lifetime of a single `process` call + 'data: 'doc, // invariant on T lifetime of the datastore + 'extractor: 'doc, // invariant lifetime of extractor_allocs + 'fid: 'doc, // invariant lifetime of fields ids map + 'indexer: 'doc, // covariant lifetime of objects that survive a `process` call + T: MostlySend, + > DocumentContext<'doc, 'extractor, 'fid, 'indexer, T> +{ + #[allow(clippy::too_many_arguments)] + pub fn new( + index: &'indexer Index, + db_fields_ids_map: &'indexer FieldsIdsMap, + new_fields_ids_map: &'fid RwLock, + extractor_allocs: &'extractor ThreadLocal>, + doc_allocs: &'doc ThreadLocal>>, + datastore: &'data ThreadLocal, + fields_ids_map_store: &'doc ThreadLocal>>>, + init_data: F, + ) -> Result + where + F: FnOnce(&'extractor Bump) -> Result, + { + let doc_alloc = + doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024)))); + let doc_alloc = doc_alloc.0.take(); + let fields_ids_map = fields_ids_map_store + .get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into()); + + let fields_ids_map = &fields_ids_map.0; + let extractor_alloc = extractor_allocs.get_or_default(); + + let data = datastore.get_or_try(move || init_data(&extractor_alloc.0))?; + + let txn = index.read_txn()?; + Ok(DocumentContext { + index, + rtxn: txn, + db_fields_ids_map, + new_fields_ids_map: fields_ids_map, + doc_alloc, + extractor_alloc: &extractor_alloc.0, + data, + doc_allocs, + }) + } +} diff --git a/crates/milli/src/update/new/document_change.rs b/crates/milli/src/update/new/document_change.rs index 8a8ac4bb3..1a40615e7 100644 --- a/crates/milli/src/update/new/document_change.rs +++ b/crates/milli/src/update/new/document_change.rs @@ -10,20 +10,16 @@ use super::vector_document::{ }; use crate::attribute_patterns::PatternMatch; use crate::documents::FieldIdMapper; -use crate::vector::EmbeddingConfigs; +use crate::update::new::document::DocumentIdentifiers; +use crate::vector::RuntimeEmbedders; use crate::{DocumentId, Index, InternalError, Result}; pub enum DocumentChange<'doc> { - Deletion(Deletion<'doc>), + Deletion(DocumentIdentifiers<'doc>), Update(Update<'doc>), Insertion(Insertion<'doc>), } -pub struct Deletion<'doc> { - docid: DocumentId, - external_document_id: &'doc str, -} - pub struct Update<'doc> { docid: DocumentId, external_document_id: &'doc str, @@ -55,31 +51,6 @@ impl<'doc> DocumentChange<'doc> { } } -impl<'doc> Deletion<'doc> { - pub fn create(docid: DocumentId, external_document_id: &'doc str) -> Self { - Self { docid, external_document_id } - } - - pub fn docid(&self) -> DocumentId { - self.docid - } - - pub fn external_document_id(&self) -> &'doc str { - self.external_document_id - } - - pub fn current<'a, Mapper: FieldIdMapper>( - &self, - rtxn: &'a RoTxn, - index: &'a Index, - mapper: &'a Mapper, - ) -> Result> { - Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or( - crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid }, - )?) - } -} - impl<'doc> Insertion<'doc> { pub fn create(docid: DocumentId, external_document_id: &'doc str, new: Versions<'doc>) -> Self { Insertion { docid, external_document_id, new } @@ -99,7 +70,7 @@ impl<'doc> Insertion<'doc> { pub fn inserted_vectors( &self, doc_alloc: &'doc Bump, - embedders: &'doc EmbeddingConfigs, + embedders: &'doc RuntimeEmbedders, ) -> Result>> { VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders) } @@ -270,7 +241,7 @@ impl<'doc> Update<'doc> { pub fn only_changed_vectors( &self, doc_alloc: &'doc Bump, - embedders: &'doc EmbeddingConfigs, + embedders: &'doc RuntimeEmbedders, ) -> Result>> { VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders) } @@ -281,7 +252,7 @@ impl<'doc> Update<'doc> { index: &'doc Index, mapper: &'doc Mapper, doc_alloc: &'doc Bump, - embedders: &'doc EmbeddingConfigs, + embedders: &'doc RuntimeEmbedders, ) -> Result>> { if self.from_scratch { MergedVectorDocument::without_db( diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs index d1c92919b..31d2ada0f 100644 --- a/crates/milli/src/update/new/extract/documents.rs +++ b/crates/milli/src/update/new/extract/documents.rs @@ -1,26 +1,33 @@ use std::cell::RefCell; +use std::collections::BTreeMap; use bumpalo::Bump; use hashbrown::HashMap; use super::DelAddRoaringBitmap; use crate::constants::RESERVED_GEO_FIELD_NAME; -use crate::update::new::channel::DocumentsSender; -use crate::update::new::document::{write_to_obkv, Document as _}; -use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; +use crate::update::new::channel::{DocumentsSender, ExtractorBbqueueSender}; +use crate::update::new::document::{write_to_obkv, Document, DocumentContext, DocumentIdentifiers}; +use crate::update::new::indexer::document_changes::{Extractor, IndexingContext}; +use crate::update::new::indexer::settings_changes::{ + settings_change_extract, DocumentsIndentifiers, SettingsChangeExtractor, +}; use crate::update::new::ref_cell_ext::RefCellExt as _; -use crate::update::new::thread_local::FullySend; +use crate::update::new::thread_local::{FullySend, ThreadLocal}; +use crate::update::new::vector_document::VectorDocument; use crate::update::new::DocumentChange; -use crate::vector::EmbeddingConfigs; +use crate::update::settings::SettingsDelta; +use crate::vector::settings::EmbedderAction; +use crate::vector::RuntimeEmbedders; use crate::Result; pub struct DocumentsExtractor<'a, 'b> { document_sender: DocumentsSender<'a, 'b>, - embedders: &'a EmbeddingConfigs, + embedders: &'a RuntimeEmbedders, } impl<'a, 'b> DocumentsExtractor<'a, 'b> { - pub fn new(document_sender: DocumentsSender<'a, 'b>, embedders: &'a EmbeddingConfigs) -> Self { + pub fn new(document_sender: DocumentsSender<'a, 'b>, embedders: &'a RuntimeEmbedders) -> Self { Self { document_sender, embedders } } } @@ -41,10 +48,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> { fn process<'doc>( &self, changes: impl Iterator>>, - context: &DocumentChangeContext, + context: &DocumentContext, ) -> Result<()> { let mut document_buffer = bumpalo::collections::Vec::new_in(&context.doc_alloc); let mut document_extractor_data = context.data.0.borrow_mut_or_yield(); + let embedder_actions = &Default::default(); for change in changes { let change = change?; @@ -121,9 +129,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> { let content = write_to_obkv( &content, vector_content.as_ref(), + embedder_actions, &mut new_fields_ids_map, &mut document_buffer, )?; + self.document_sender.uncompressed(docid, external_docid, content).unwrap(); } DocumentChange::Insertion(insertion) => { @@ -146,6 +156,7 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> { let content = write_to_obkv( &content, inserted_vectors.as_ref(), + embedder_actions, &mut new_fields_ids_map, &mut document_buffer, )?; @@ -158,3 +169,144 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> { Ok(()) } } + +pub struct SettingsChangeDocumentExtractor<'a, 'b> { + document_sender: DocumentsSender<'a, 'b>, + embedder_actions: &'a BTreeMap, +} + +impl<'a, 'b> SettingsChangeDocumentExtractor<'a, 'b> { + pub fn new( + document_sender: DocumentsSender<'a, 'b>, + embedder_actions: &'a BTreeMap, + ) -> Self { + Self { document_sender, embedder_actions } + } +} + +impl<'extractor> SettingsChangeExtractor<'extractor> for SettingsChangeDocumentExtractor<'_, '_> { + type Data = FullySend<()>; + + fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result { + Ok(FullySend(())) + } + + fn process<'doc>( + &self, + documents: impl Iterator>>, + context: &DocumentContext, + ) -> Result<()> { + let mut document_buffer = bumpalo::collections::Vec::new_in(&context.doc_alloc); + + for document in documents { + let document = document?; + // **WARNING**: the exclusive borrow on `new_fields_ids_map` needs to be taken **inside** of the `for change in changes` loop + // Otherwise, `BorrowMutError` will occur for document changes that also need the new_fields_ids_map (e.g.: UpdateByFunction) + let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); + + let external_docid = document.external_document_id().to_owned(); + let content = + document.current(&context.rtxn, context.index, &context.db_fields_ids_map)?; + let vector_content = document.current_vectors( + &context.rtxn, + context.index, + &context.db_fields_ids_map, + &context.doc_alloc, + )?; + + // if the document doesn't need to be updated, we skip it + if !must_update_document(&vector_content, self.embedder_actions)? { + continue; + } + + let content = write_to_obkv( + &content, + Some(&vector_content), + self.embedder_actions, + &mut new_fields_ids_map, + &mut document_buffer, + )?; + + self.document_sender.uncompressed(document.docid(), external_docid, content).unwrap(); + } + + Ok(()) + } +} + +/// Modify the database documents based on the settings changes. +/// +/// This function extracts the documents from the database, +/// modifies them by adding or removing vector fields based on embedder actions, +/// and then updates the database. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents::extract")] +pub fn update_database_documents<'indexer, 'extractor, MSP, SD>( + documents: &'indexer DocumentsIndentifiers<'indexer>, + indexing_context: IndexingContext, + extractor_sender: &ExtractorBbqueueSender, + settings_delta: &SD, + extractor_allocs: &'extractor mut ThreadLocal>, +) -> Result<()> +where + MSP: Fn() -> bool + Sync, + SD: SettingsDelta, +{ + if !must_update_database(settings_delta) { + return Ok(()); + } + + let document_sender = extractor_sender.documents(); + let document_extractor = + SettingsChangeDocumentExtractor::new(document_sender, settings_delta.embedder_actions()); + let datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); + + settings_change_extract( + documents, + &document_extractor, + indexing_context, + extractor_allocs, + &datastore, + crate::update::new::steps::IndexingStep::ExtractingDocuments, + )?; + + Ok(()) +} + +fn must_update_database(settings_delta: &SD) -> bool { + settings_delta.embedder_actions().iter().any(|(name, action)| { + if action.reindex().is_some() { + // if action has a reindex, we need to update the documents database if the embedder is a new one + settings_delta.old_embedders().get(name).is_none() + } else { + // if action has a write_back, we need to update the documents database + action.write_back().is_some() + } + }) +} + +fn must_update_document<'s, 'a>( + vector_document: &'s impl VectorDocument<'s>, + embedder_actions: &'a BTreeMap, +) -> Result +where + 's: 'a, +{ + // Check if any vector needs to be written back for the document + for (name, action) in embedder_actions { + // if the vector entry is not found, we don't need to update the document + let Some(vector_entry) = vector_document.vectors_for_key(name)? else { + continue; + }; + + // if the vector entry is user provided, we need to update the document by writing back vectors. + let write_back = action.write_back().is_some() && !vector_entry.regenerate; + // if the vector entry is a new embedder, we need to update the document removing the vectors from the document. + let new_embedder = action.reindex().is_some() && !vector_entry.has_configured_embedder; + + if write_back || new_embedder { + return Ok(true); + } + } + + Ok(false) +} diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 517ef3f2d..6e9ae7ee4 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -15,9 +15,10 @@ use crate::filterable_attributes_rules::match_faceted_field; use crate::heed_codec::facet::OrderedF64Codec; use crate::update::del_add::DelAdd; use crate::update::new::channel::FieldIdDocidFacetSender; +use crate::update::new::document::DocumentContext; use crate::update::new::extract::perm_json_p; use crate::update::new::indexer::document_changes::{ - extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, + extract, DocumentChanges, Extractor, IndexingContext, }; use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::steps::IndexingStep; @@ -51,7 +52,7 @@ impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'_, '_> { fn process<'doc>( &self, changes: impl Iterator>>, - context: &DocumentChangeContext, + context: &DocumentContext, ) -> Result<()> { for change in changes { let change = change?; @@ -75,7 +76,7 @@ pub struct FacetedDocidsExtractor; impl FacetedDocidsExtractor { #[allow(clippy::too_many_arguments)] fn extract_document_change( - context: &DocumentChangeContext>, + context: &DocumentContext>, filterable_attributes: &[FilterableAttributesRule], sortable_fields: &HashSet, asc_desc_fields: &HashSet, diff --git a/crates/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs index 359c32e58..d0f088bad 100644 --- a/crates/milli/src/update/new/extract/faceted/facet_document.rs +++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs @@ -4,6 +4,7 @@ use serde_json::Value; use crate::attribute_patterns::PatternMatch; use crate::fields_ids_map::metadata::Metadata; +use crate::filterable_attributes_rules::match_faceted_field; use crate::update::new::document::Document; use crate::update::new::extract::geo::extract_geo_coordinates; use crate::update::new::extract::perm_json_p; @@ -11,8 +12,6 @@ use crate::{ FieldId, FilterableAttributesRule, GlobalFieldsIdsMap, InternalError, Result, UserError, }; -use crate::filterable_attributes_rules::match_faceted_field; - #[allow(clippy::too_many_arguments)] pub fn extract_document_facets<'doc>( document: impl Document<'doc>, diff --git a/crates/milli/src/update/new/extract/geo/mod.rs b/crates/milli/src/update/new/extract/geo/mod.rs index b2ccc1b2b..8e164b48f 100644 --- a/crates/milli/src/update/new/extract/geo/mod.rs +++ b/crates/milli/src/update/new/extract/geo/mod.rs @@ -10,8 +10,8 @@ use serde_json::value::RawValue; use serde_json::Value; use crate::error::GeoError; -use crate::update::new::document::Document; -use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; +use crate::update::new::document::{Document, DocumentContext}; +use crate::update::new::indexer::document_changes::Extractor; use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::thread_local::MostlySend; use crate::update::new::DocumentChange; @@ -150,7 +150,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { fn process<'doc>( &'doc self, changes: impl Iterator>>, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, ) -> Result<()> { let rtxn = &context.rtxn; let index = context.index; diff --git a/crates/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs index a8264ba4a..05c90d8f8 100644 --- a/crates/milli/src/update/new/extract/mod.rs +++ b/crates/milli/src/update/new/extract/mod.rs @@ -12,13 +12,14 @@ pub use documents::*; pub use faceted::*; pub use geo::*; pub use searchable::*; -pub use vectors::EmbeddingExtractor; +pub use vectors::{EmbeddingExtractor, SettingsChangeEmbeddingExtractor}; /// TODO move in permissive json pointer pub mod perm_json_p { use serde_json::{Map, Value}; - use crate::{attribute_patterns::PatternMatch, Result}; + use crate::attribute_patterns::PatternMatch; + use crate::Result; const SPLIT_SYMBOL: char = '.'; /// Returns `true` if the `selector` match the `key`. diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs index 046116939..5daf34ca4 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -8,10 +8,11 @@ use bumpalo::Bump; use super::match_searchable_field; use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; +use crate::update::new::document::DocumentContext; use crate::update::new::extract::cache::BalancedCaches; use crate::update::new::extract::perm_json_p::contained_in; use crate::update::new::indexer::document_changes::{ - extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, + extract, DocumentChanges, Extractor, IndexingContext, }; use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::steps::IndexingStep; @@ -226,7 +227,7 @@ impl<'extractor> Extractor<'extractor> for WordDocidsExtractorData<'_> { fn process<'doc>( &self, changes: impl Iterator>>, - context: &DocumentChangeContext, + context: &DocumentContext, ) -> Result<()> { for change in changes { let change = change?; @@ -305,7 +306,7 @@ impl WordDocidsExtractors { } fn extract_document_change( - context: &DocumentChangeContext>>, + context: &DocumentContext>>, document_tokenizer: &DocumentTokenizer, searchable_attributes: Option<&[&str]>, document_change: DocumentChange, diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs index 3b358800f..c9acb9734 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs @@ -7,10 +7,10 @@ use bumpalo::Bump; use super::match_searchable_field; use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; use crate::proximity::{index_proximity, MAX_DISTANCE}; -use crate::update::new::document::Document; +use crate::update::new::document::{Document, DocumentContext}; use crate::update::new::extract::cache::BalancedCaches; use crate::update::new::indexer::document_changes::{ - extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, + extract, DocumentChanges, Extractor, IndexingContext, }; use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::steps::IndexingStep; @@ -39,7 +39,7 @@ impl<'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractorData< fn process<'doc>( &self, changes: impl Iterator>>, - context: &DocumentChangeContext, + context: &DocumentContext, ) -> Result<()> { for change in changes { let change = change?; @@ -116,7 +116,7 @@ impl WordPairProximityDocidsExtractor { // and to store the docids of the documents that have a number of words in a given field // equal to or under than MAX_COUNTED_WORDS. fn extract_document_change( - context: &DocumentChangeContext>, + context: &DocumentContext>, document_tokenizer: &DocumentTokenizer, searchable_attributes: Option<&[&str]>, document_change: DocumentChange, diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs index 47bd622ae..4ca68027c 100644 --- a/crates/milli/src/update/new/extract/vectors/mod.rs +++ b/crates/milli/src/update/new/extract/vectors/mod.rs @@ -1,44 +1,57 @@ use std::cell::RefCell; +use std::fmt::Debug; use bumpalo::collections::Vec as BVec; use bumpalo::Bump; use hashbrown::{DefaultHashBuilder, HashMap}; -use super::cache::DelAddRoaringBitmap; use crate::error::FaultSource; +use crate::progress::EmbedderStats; use crate::prompt::Prompt; use crate::update::new::channel::EmbeddingSender; -use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; +use crate::update::new::document::{Document, DocumentContext, DocumentIdentifiers}; +use crate::update::new::indexer::document_changes::Extractor; +use crate::update::new::indexer::settings_changes::SettingsChangeExtractor; use crate::update::new::thread_local::MostlySend; use crate::update::new::vector_document::VectorDocument; use crate::update::new::DocumentChange; +use crate::update::settings::SettingsDelta; +use crate::vector::db::{EmbedderInfo, EmbeddingStatus, EmbeddingStatusDelta}; use crate::vector::error::{ EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistributionBump, }; -use crate::vector::{Embedder, Embedding, EmbeddingConfigs}; +use crate::vector::extractor::{ + DocumentTemplateExtractor, Extractor as VectorExtractor, ExtractorDiff, + RequestFragmentExtractor, +}; +use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed}; +use crate::vector::settings::ReindexAction; +use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment}; use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError}; pub struct EmbeddingExtractor<'a, 'b> { - embedders: &'a EmbeddingConfigs, + embedders: &'a RuntimeEmbedders, sender: EmbeddingSender<'a, 'b>, possible_embedding_mistakes: PossibleEmbeddingMistakes, + embedder_stats: &'a EmbedderStats, threads: &'a ThreadPoolNoAbort, } impl<'a, 'b> EmbeddingExtractor<'a, 'b> { pub fn new( - embedders: &'a EmbeddingConfigs, + embedders: &'a RuntimeEmbedders, sender: EmbeddingSender<'a, 'b>, field_distribution: &'a FieldDistribution, + embedder_stats: &'a EmbedderStats, threads: &'a ThreadPoolNoAbort, ) -> Self { let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution); - Self { embedders, sender, threads, possible_embedding_mistakes } + Self { embedders, sender, threads, possible_embedding_mistakes, embedder_stats } } } pub struct EmbeddingExtractorData<'extractor>( - pub HashMap, + pub HashMap, ); unsafe impl MostlySend for EmbeddingExtractorData<'_> {} @@ -53,28 +66,28 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { fn process<'doc>( &'doc self, changes: impl Iterator>>, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, ) -> crate::Result<()> { let embedders = self.embedders.inner_as_ref(); let mut unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc); let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc); - for (embedder_name, (embedder, prompt, _is_quantized)) in embedders { - let embedder_id = - context.index.embedder_category_id.get(&context.rtxn, embedder_name)?.ok_or_else( - || InternalError::DatabaseMissingEntry { - db_name: "embedder_category_id", - key: None, - }, - )?; + let embedder_db = context.index.embedding_configs(); + for (embedder_name, runtime) in embedders { + let embedder_info = embedder_db + .embedder_info(&context.rtxn, embedder_name)? + .ok_or_else(|| InternalError::DatabaseMissingEntry { + db_name: "embedder_category_id", + key: None, + })?; all_chunks.push(Chunks::new( - embedder, - embedder_id, + runtime, + embedder_info, embedder_name, - prompt, context.data, &self.possible_embedding_mistakes, + self.embedder_stats, self.threads, self.sender, &context.doc_alloc, @@ -86,19 +99,14 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { match change { DocumentChange::Deletion(deletion) => { // vector deletion is handled by document sender, - // we still need to accomodate deletion from user_provided + // we still need to accomodate deletion from embedding_status for chunks in &mut all_chunks { - // regenerate: true means we delete from user_provided - chunks.set_regenerate(deletion.docid(), true); + let (is_user_provided, must_regenerate) = + chunks.is_user_provided_must_regenerate(deletion.docid()); + chunks.clear_status(deletion.docid(), is_user_provided, must_regenerate); } } DocumentChange::Update(update) => { - let old_vectors = update.current_vectors( - &context.rtxn, - context.index, - context.db_fields_ids_map, - &context.doc_alloc, - )?; let new_vectors = update.only_changed_vectors(&context.doc_alloc, self.embedders)?; @@ -107,17 +115,16 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { } for chunks in &mut all_chunks { - let embedder_name = chunks.embedder_name(); - let prompt = chunks.prompt(); + let (old_is_user_provided, old_must_regenerate) = + chunks.is_user_provided_must_regenerate(update.docid()); - let old_vectors = old_vectors.vectors_for_key(embedder_name)?.unwrap(); + let embedder_name = chunks.embedder_name(); + + // case where we have a `_vectors` field in the updated document if let Some(new_vectors) = new_vectors.as_ref().and_then(|new_vectors| { new_vectors.vectors_for_key(embedder_name).transpose() }) { let new_vectors = new_vectors?; - if old_vectors.regenerate != new_vectors.regenerate { - chunks.set_regenerate(update.docid(), new_vectors.regenerate); - } // do we have set embeddings? if let Some(embeddings) = new_vectors.embeddings { chunks.set_vectors( @@ -129,70 +136,62 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { document_id: update.external_document_id().to_string(), error: error.to_string(), })?, + old_is_user_provided, + old_must_regenerate, + new_vectors.regenerate, )?; + // regenerate if the new `_vectors` fields is set to. } else if new_vectors.regenerate { - let new_rendered = prompt.render_document( - update.external_document_id(), - update.current( - &context.rtxn, - context.index, - context.db_fields_ids_map, - )?, - context.new_fields_ids_map, - &context.doc_alloc, - )?; - let old_rendered = prompt.render_document( - update.external_document_id(), - update.merged( - &context.rtxn, - context.index, - context.db_fields_ids_map, - )?, - context.new_fields_ids_map, - &context.doc_alloc, - )?; - if new_rendered != old_rendered { - chunks.set_autogenerated( - update.docid(), - update.external_document_id(), - new_rendered, - &unused_vectors_distribution, - )?; - } - } - } else if old_vectors.regenerate { - let old_rendered = prompt.render_document( - update.external_document_id(), - update.current( + let new_document = update.merged( &context.rtxn, context.index, context.db_fields_ids_map, - )?, - context.new_fields_ids_map, - &context.doc_alloc, - )?; - let new_rendered = prompt.render_document( - update.external_document_id(), - update.merged( + )?; + let old_document = update.current( &context.rtxn, context.index, context.db_fields_ids_map, - )?, - context.new_fields_ids_map, - &context.doc_alloc, - )?; - if new_rendered != old_rendered { - chunks.set_autogenerated( + )?; + chunks.update_autogenerated( update.docid(), update.external_document_id(), - new_rendered, + old_document, + new_document, + context.new_fields_ids_map, &unused_vectors_distribution, + old_is_user_provided, + old_must_regenerate, + true, )?; } + // no `_vectors` field, so only regenerate if the document is already set to in the DB. + } else if old_must_regenerate { + let new_document = update.merged( + &context.rtxn, + context.index, + context.db_fields_ids_map, + )?; + let old_document = update.current( + &context.rtxn, + context.index, + context.db_fields_ids_map, + )?; + chunks.update_autogenerated( + update.docid(), + update.external_document_id(), + old_document, + new_document, + context.new_fields_ids_map, + &unused_vectors_distribution, + old_is_user_provided, + old_must_regenerate, + true, + )?; } } } DocumentChange::Insertion(insertion) => { + let (default_is_user_provided, default_must_regenerate) = (false, true); let new_vectors = insertion.inserted_vectors(&context.doc_alloc, self.embedders)?; if let Some(new_vectors) = &new_vectors { @@ -201,13 +200,11 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { for chunks in &mut all_chunks { let embedder_name = chunks.embedder_name(); - let prompt = chunks.prompt(); // if no inserted vectors, then regenerate: true + no embeddings => autogenerate if let Some(new_vectors) = new_vectors.as_ref().and_then(|new_vectors| { new_vectors.vectors_for_key(embedder_name).transpose() }) { let new_vectors = new_vectors?; - chunks.set_regenerate(insertion.docid(), new_vectors.regenerate); if let Some(embeddings) = new_vectors.embeddings { chunks.set_vectors( insertion.external_document_id(), @@ -220,33 +217,36 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { .to_string(), error: error.to_string(), })?, + default_is_user_provided, + default_must_regenerate, + new_vectors.regenerate, )?; } else if new_vectors.regenerate { - let rendered = prompt.render_document( + chunks.insert_autogenerated( + insertion.docid(), insertion.external_document_id(), insertion.inserted(), context.new_fields_ids_map, - &context.doc_alloc, - )?; - chunks.set_autogenerated( - insertion.docid(), - insertion.external_document_id(), - rendered, &unused_vectors_distribution, + true, )?; + } else { + chunks.set_status( + insertion.docid(), + default_is_user_provided, + default_must_regenerate, + false, + false, + ); } } else { - let rendered = prompt.render_document( + chunks.insert_autogenerated( + insertion.docid(), insertion.external_document_id(), insertion.inserted(), context.new_fields_ids_map, - &context.doc_alloc, - )?; - chunks.set_autogenerated( - insertion.docid(), - insertion.external_document_id(), - rendered, &unused_vectors_distribution, + true, )?; } } @@ -261,150 +261,254 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { } } -// **Warning**: the destructor of this struct is not normally run, make sure that all its fields: -// 1. don't have side effects tied to they destructors -// 2. if allocated, are allocated inside of the bumpalo -// -// Currently this is the case as: -// 1. BVec are inside of the bumaplo -// 2. All other fields are either trivial (u8) or references. -struct Chunks<'a, 'b, 'extractor> { - texts: BVec<'a, &'a str>, - ids: BVec<'a, DocumentId>, - - embedder: &'a Embedder, - embedder_id: u8, - embedder_name: &'a str, - dimensions: usize, - prompt: &'a Prompt, - possible_embedding_mistakes: &'a PossibleEmbeddingMistakes, - user_provided: &'a RefCell>, - threads: &'a ThreadPoolNoAbort, +pub struct SettingsChangeEmbeddingExtractor<'a, 'b, SD> { + settings_delta: &'a SD, + embedder_stats: &'a EmbedderStats, sender: EmbeddingSender<'a, 'b>, - has_manual_generation: Option<&'a str>, + possible_embedding_mistakes: PossibleEmbeddingMistakes, + threads: &'a ThreadPoolNoAbort, } -impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> { +impl<'a, 'b, SD: SettingsDelta> SettingsChangeEmbeddingExtractor<'a, 'b, SD> { #[allow(clippy::too_many_arguments)] pub fn new( - embedder: &'a Embedder, - embedder_id: u8, - embedder_name: &'a str, - prompt: &'a Prompt, - user_provided: &'a RefCell>, - possible_embedding_mistakes: &'a PossibleEmbeddingMistakes, + settings_delta: &'a SD, + embedder_stats: &'a EmbedderStats, + sender: EmbeddingSender<'a, 'b>, + field_distribution: &'a FieldDistribution, threads: &'a ThreadPoolNoAbort, - sender: EmbeddingSender<'a, 'b>, - doc_alloc: &'a Bump, ) -> Self { - let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint(); - let texts = BVec::with_capacity_in(capacity, doc_alloc); - let ids = BVec::with_capacity_in(capacity, doc_alloc); - let dimensions = embedder.dimensions(); - Self { - texts, - ids, - embedder, - prompt, - possible_embedding_mistakes, - threads, - sender, - embedder_id, - embedder_name, - user_provided, - has_manual_generation: None, - dimensions, - } + let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution); + Self { settings_delta, embedder_stats, sender, threads, possible_embedding_mistakes } + } +} + +impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor> + for SettingsChangeEmbeddingExtractor<'_, '_, SD> +{ + type Data = RefCell>; + + fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result { + Ok(RefCell::new(EmbeddingExtractorData(HashMap::new_in(extractor_alloc)))) } - pub fn set_autogenerated( - &mut self, - docid: DocumentId, - external_docid: &'a str, - rendered: &'a str, - unused_vectors_distribution: &UnusedVectorsDistributionBump, - ) -> Result<()> { - let is_manual = matches!(&self.embedder, &Embedder::UserProvided(_)); - if is_manual { - self.has_manual_generation.get_or_insert(external_docid); + fn process<'doc>( + &'doc self, + documents: impl Iterator>>, + context: &'doc DocumentContext, + ) -> crate::Result<()> { + let embedders = self.settings_delta.new_embedders(); + let old_embedders = self.settings_delta.old_embedders(); + let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc); + + let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc); + let embedder_configs = context.index.embedding_configs(); + for (embedder_name, action) in self.settings_delta.embedder_actions().iter() { + let Some(reindex_action) = action.reindex() else { + continue; + }; + let runtime = embedders + .get(embedder_name) + .expect("A runtime must exist for all reindexed embedder"); + let embedder_info = embedder_configs + .embedder_info(&context.rtxn, embedder_name)? + .unwrap_or_else(|| { + // new embedder + EmbedderInfo { + embedder_id: *self + .settings_delta + .new_embedder_category_id() + .get(embedder_name) + .expect( + "An embedder_category_id must exist for all reindexed embedders", + ), + embedding_status: EmbeddingStatus::new(), + } + }); + all_chunks.push(( + Chunks::new( + runtime, + embedder_info, + embedder_name.as_str(), + context.data, + &self.possible_embedding_mistakes, + self.embedder_stats, + self.threads, + self.sender, + &context.doc_alloc, + ), + reindex_action, + )); } + for document in documents { + let document = document?; - if self.texts.len() < self.texts.capacity() { - self.texts.push(rendered); - self.ids.push(docid); - return Ok(()); - } + let current_vectors = document.current_vectors( + &context.rtxn, + context.index, + context.db_fields_ids_map, + &context.doc_alloc, + )?; - Self::embed_chunks( - &mut self.texts, - &mut self.ids, - self.embedder, - self.embedder_id, - self.embedder_name, - self.possible_embedding_mistakes, - unused_vectors_distribution, - self.threads, - self.sender, - self.has_manual_generation.take(), - ) - } + for (chunks, reindex_action) in &mut all_chunks { + let embedder_name = chunks.embedder_name(); + let current_vectors = current_vectors.vectors_for_key(embedder_name)?; + let (old_is_user_provided, _) = + chunks.is_user_provided_must_regenerate(document.docid()); + let old_has_fragments = old_embedders + .get(embedder_name) + .map(|embedder| !embedder.fragments().is_empty()) + .unwrap_or_default(); - pub fn drain( - mut self, - unused_vectors_distribution: &UnusedVectorsDistributionBump, - ) -> Result<()> { - let res = Self::embed_chunks( - &mut self.texts, - &mut self.ids, - self.embedder, - self.embedder_id, - self.embedder_name, - self.possible_embedding_mistakes, - unused_vectors_distribution, - self.threads, - self.sender, - self.has_manual_generation, - ); - // optimization: don't run bvec dtors as they only contain bumpalo allocated stuff - std::mem::forget(self); - res - } + let new_has_fragments = chunks.has_fragments(); - #[allow(clippy::too_many_arguments)] - pub fn embed_chunks( - texts: &mut BVec<'a, &'a str>, - ids: &mut BVec<'a, DocumentId>, - embedder: &Embedder, - embedder_id: u8, - embedder_name: &str, - possible_embedding_mistakes: &PossibleEmbeddingMistakes, - unused_vectors_distribution: &UnusedVectorsDistributionBump, - threads: &ThreadPoolNoAbort, - sender: EmbeddingSender<'a, 'b>, - has_manual_generation: Option<&'a str>, - ) -> Result<()> { - if let Some(external_docid) = has_manual_generation { - let mut msg = format!( - r"While embedding documents for embedder `{embedder_name}`: no vectors provided for document `{}`{}", - external_docid, - if ids.len() > 1 { - format!(" and at least {} other document(s)", ids.len() - 1) - } else { - "".to_string() + let fragments_changed = old_has_fragments ^ new_has_fragments; + + // if the vectors for this document have been already provided, we don't need to reindex. + let (is_new_embedder, must_regenerate) = + current_vectors.as_ref().map_or((true, true), |vectors| { + (!vectors.has_configured_embedder, vectors.regenerate) + }); + + match reindex_action { + ReindexAction::RegeneratePrompts | ReindexAction::RegenerateFragments(_) => { + if !must_regenerate { + continue; + } + // we need to regenerate the prompts for the document + chunks.settings_change_autogenerated( + document.docid(), + document.external_document_id(), + document.current( + &context.rtxn, + context.index, + context.db_fields_ids_map, + )?, + self.settings_delta, + context.new_fields_ids_map, + &unused_vectors_distribution, + old_is_user_provided, + fragments_changed, + )?; + } + ReindexAction::FullReindex => { + // if no inserted vectors, then regenerate: true + no embeddings => autogenerate + if let Some(embeddings) = current_vectors + .and_then(|vectors| vectors.embeddings) + // insert the embeddings only for new embedders + .filter(|_| is_new_embedder) + { + chunks.set_vectors( + document.external_document_id(), + document.docid(), + embeddings.into_vec(&context.doc_alloc, embedder_name).map_err( + |error| UserError::InvalidVectorsEmbedderConf { + document_id: document.external_document_id().to_string(), + error: error.to_string(), + }, + )?, + old_is_user_provided, + true, + must_regenerate, + )?; + } else if must_regenerate { + chunks.settings_change_autogenerated( + document.docid(), + document.external_document_id(), + document.current( + &context.rtxn, + context.index, + context.db_fields_ids_map, + )?, + self.settings_delta, + context.new_fields_ids_map, + &unused_vectors_distribution, + old_is_user_provided, + true, + )?; + } else if is_new_embedder { + chunks.set_status(document.docid(), false, true, false, false); + } + } } - ); + } + } - msg += &format!("\n- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`."); + for (chunk, _) in all_chunks { + chunk.drain(&unused_vectors_distribution)?; + } + + Ok(()) + } +} + +pub struct OnEmbeddingDocumentUpdates<'doc, 'b> { + embedder_id: u8, + sender: EmbeddingSender<'doc, 'b>, + possible_embedding_mistakes: &'doc PossibleEmbeddingMistakes, +} + +impl OnEmbeddingDocumentUpdates<'_, '_> { + fn clear_vectors(&self, docid: DocumentId) { + self.sender.set_vectors(docid, self.embedder_id, vec![]).unwrap(); + } + + fn process_embeddings(&mut self, metadata: Metadata<'_>, embeddings: Vec) { + self.sender.set_vectors(metadata.docid, self.embedder_id, embeddings).unwrap(); + } +} + +impl<'doc> OnEmbed<'doc> for OnEmbeddingDocumentUpdates<'doc, '_> { + type ErrorMetadata = UnusedVectorsDistributionBump<'doc>; + fn process_embedding_response( + &mut self, + response: crate::vector::session::EmbeddingResponse<'doc>, + ) { + self.sender + .set_vector( + response.metadata.docid, + self.embedder_id, + response.metadata.extractor_id, + response.embedding, + ) + .unwrap(); + } + fn process_embedding_error( + &mut self, + error: crate::vector::hf::EmbedError, + embedder_name: &'doc str, + unused_vectors_distribution: &UnusedVectorsDistributionBump, + metadata: BVec<'doc, Metadata<'doc>>, + ) -> crate::Error { + if let FaultSource::Bug = error.fault { + crate::Error::InternalError(crate::InternalError::VectorEmbeddingError(error.into())) + } else { + let mut msg = if let EmbedErrorKind::ManualEmbed(_) = &error.kind { + format!( + r"While embedding documents for embedder `{embedder_name}`: no vectors provided for document `{}`{} +- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`.", + if let Some(first) = metadata.first() { first.external_docid } else { "???" }, + if metadata.len() > 1 { + format!(" and at least {} other document(s)", metadata.len() - 1) + } else { + "".to_string() + } + ) + } else { + format!(r"While embedding documents for embedder `{embedder_name}`: {error}") + }; let mut hint_count = 0; - for (vector_misspelling, count) in possible_embedding_mistakes.vector_mistakes().take(2) + for (vector_misspelling, count) in + self.possible_embedding_mistakes.vector_mistakes().take(2) { msg += &format!("\n- Hint: try replacing `{vector_misspelling}` by `_vectors` in {count} document(s)."); hint_count += 1; } - for (embedder_misspelling, count) in possible_embedding_mistakes + for (embedder_misspelling, count) in self + .possible_embedding_mistakes .embedder_mistakes_bump(embedder_name, unused_vectors_distribution) .take(2) { @@ -413,107 +517,516 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> { } if hint_count == 0 { - msg += &format!( - "\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`" - ); - } - - return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg))); - } - - let res = match embedder.embed_index_ref(texts.as_slice(), threads) { - Ok(embeddings) => { - for (docid, embedding) in ids.into_iter().zip(embeddings) { - sender.set_vector(*docid, embedder_id, embedding).unwrap(); - } - Ok(()) - } - Err(error) => { - if let FaultSource::Bug = error.fault { - Err(crate::Error::InternalError(crate::InternalError::VectorEmbeddingError( - error.into(), - ))) - } else { - let mut msg = format!( - r"While embedding documents for embedder `{embedder_name}`: {error}" + if let EmbedErrorKind::ManualEmbed(_) = &error.kind { + msg += &format!( + "\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`" ); - - if let EmbedErrorKind::ManualEmbed(_) = &error.kind { - msg += &format!("\n- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`."); - } - - let mut hint_count = 0; - - for (vector_misspelling, count) in - possible_embedding_mistakes.vector_mistakes().take(2) - { - msg += &format!("\n- Hint: try replacing `{vector_misspelling}` by `_vectors` in {count} document(s)."); - hint_count += 1; - } - - for (embedder_misspelling, count) in possible_embedding_mistakes - .embedder_mistakes_bump(embedder_name, unused_vectors_distribution) - .take(2) - { - msg += &format!("\n- Hint: try replacing `_vectors.{embedder_misspelling}` by `_vectors.{embedder_name}` in {count} document(s)."); - hint_count += 1; - } - - if hint_count == 0 { - if let EmbedErrorKind::ManualEmbed(_) = &error.kind { - msg += &format!( - "\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`" - ); - } - } - - Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg))) } } + + crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)) + } + } +} + +struct Chunks<'a, 'b, 'extractor> { + dimensions: usize, + status_delta: &'a RefCell>, + status: EmbeddingStatus, + kind: ChunkType<'a, 'b>, +} + +enum ChunkType<'a, 'b> { + DocumentTemplate { + document_template: &'a Prompt, + session: EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, &'a str>, + }, + Fragments { + fragments: &'a [RuntimeFragment], + session: EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, serde_json::Value>, + }, +} + +impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> { + #[allow(clippy::too_many_arguments)] + pub fn new( + runtime: &'a RuntimeEmbedder, + embedder_info: EmbedderInfo, + embedder_name: &'a str, + status_delta: &'a RefCell>, + possible_embedding_mistakes: &'a PossibleEmbeddingMistakes, + embedder_stats: &'a EmbedderStats, + threads: &'a ThreadPoolNoAbort, + sender: EmbeddingSender<'a, 'b>, + doc_alloc: &'a Bump, + ) -> Self { + let embedder = &runtime.embedder; + let dimensions = embedder.dimensions(); + + let fragments = runtime.fragments(); + let kind = if fragments.is_empty() { + ChunkType::DocumentTemplate { + document_template: &runtime.document_template, + session: EmbedSession::new( + &runtime.embedder, + embedder_name, + threads, + doc_alloc, + embedder_stats, + OnEmbeddingDocumentUpdates { + embedder_id: embedder_info.embedder_id, + sender, + possible_embedding_mistakes, + }, + ), + } + } else { + ChunkType::Fragments { + fragments, + session: EmbedSession::new( + &runtime.embedder, + embedder_name, + threads, + doc_alloc, + embedder_stats, + OnEmbeddingDocumentUpdates { + embedder_id: embedder_info.embedder_id, + sender, + possible_embedding_mistakes, + }, + ), + } }; - texts.clear(); - ids.clear(); - res + + Self { dimensions, status: embedder_info.embedding_status, status_delta, kind } } - pub fn prompt(&self) -> &'a Prompt { - self.prompt + pub fn is_user_provided_must_regenerate(&self, docid: DocumentId) -> (bool, bool) { + self.status.is_user_provided_must_regenerate(docid) + } + + #[allow(clippy::too_many_arguments)] + pub fn settings_change_autogenerated<'doc, D: Document<'doc> + Debug, SD: SettingsDelta>( + &mut self, + docid: DocumentId, + external_docid: &'a str, + document: D, + settings_delta: &SD, + fields_ids_map: &'a RefCell, + unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>, + old_is_user_provided: bool, + full_reindex: bool, + ) -> Result<()> + where + 'a: 'doc, + { + match &mut self.kind { + ChunkType::Fragments { fragments: _, session } => { + let doc_alloc = session.doc_alloc(); + + if old_is_user_provided | full_reindex { + session.on_embed_mut().clear_vectors(docid); + } + + settings_delta.try_for_each_fragment_diff( + session.embedder_name(), + |fragment_diff| { + let extractor = RequestFragmentExtractor::new(fragment_diff.new, doc_alloc) + .ignore_errors(); + let old = if full_reindex { + None + } else { + fragment_diff.old.map(|old| { + RequestFragmentExtractor::new(old, doc_alloc).ignore_errors() + }) + }; + let metadata = Metadata { + docid, + external_docid, + extractor_id: extractor.extractor_id(), + }; + + match extractor.diff_settings(&document, &(), old.as_ref())? { + ExtractorDiff::Removed => { + OnEmbed::process_embedding_response( + session.on_embed_mut(), + crate::vector::session::EmbeddingResponse { + metadata, + embedding: None, + }, + ); + } + ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => { + session.request_embedding( + metadata, + input, + unused_vectors_distribution, + )?; + } + ExtractorDiff::Unchanged => { /* nothing to do */ } + } + + Result::Ok(()) + }, + )?; + self.set_status(docid, old_is_user_provided, true, false, true); + } + ChunkType::DocumentTemplate { document_template, session } => { + let doc_alloc = session.doc_alloc(); + + let old_embedder = settings_delta.old_embedders().get(session.embedder_name()); + let old_document_template = if full_reindex { + None + } else { + old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template) + }; + let extractor = + DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map); + let old_extractor = old_document_template.map(|old_document_template| { + DocumentTemplateExtractor::new(old_document_template, doc_alloc, fields_ids_map) + }); + let metadata = + Metadata { docid, external_docid, extractor_id: extractor.extractor_id() }; + + match extractor.diff_settings(document, &external_docid, old_extractor.as_ref())? { + ExtractorDiff::Removed => { + OnEmbed::process_embedding_response( + session.on_embed_mut(), + crate::vector::session::EmbeddingResponse { metadata, embedding: None }, + ); + } + ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => { + session.request_embedding(metadata, input, unused_vectors_distribution)?; + } + ExtractorDiff::Unchanged => { /* do nothing */ } + } + self.set_status(docid, old_is_user_provided, true, false, true); + } + } + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + pub fn update_autogenerated<'doc, OD: Document<'doc> + Debug, ND: Document<'doc> + Debug>( + &mut self, + docid: DocumentId, + external_docid: &'a str, + old_document: OD, + new_document: ND, + new_fields_ids_map: &'a RefCell, + unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>, + old_is_user_provided: bool, + old_must_regenerate: bool, + new_must_regenerate: bool, + ) -> Result<()> + where + 'a: 'doc, + { + match &mut self.kind { + ChunkType::DocumentTemplate { document_template, session } => { + let doc_alloc = session.doc_alloc(); + let ex = DocumentTemplateExtractor::new( + document_template, + doc_alloc, + new_fields_ids_map, + ); + + if old_is_user_provided { + session.on_embed_mut().clear_vectors(docid); + } + + update_autogenerated( + docid, + external_docid, + [ex], + old_document, + new_document, + &external_docid, + old_must_regenerate, + session, + unused_vectors_distribution, + )? + } + ChunkType::Fragments { fragments, session } => { + let doc_alloc = session.doc_alloc(); + let extractors = fragments.iter().map(|fragment| { + RequestFragmentExtractor::new(fragment, doc_alloc).ignore_errors() + }); + + if old_is_user_provided { + session.on_embed_mut().clear_vectors(docid); + } + + update_autogenerated( + docid, + external_docid, + extractors, + old_document, + new_document, + &(), + old_must_regenerate, + session, + unused_vectors_distribution, + )? + } + }; + + self.set_status( + docid, + old_is_user_provided, + old_must_regenerate, + false, + new_must_regenerate, + ); + + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + pub fn insert_autogenerated + Debug>( + &mut self, + docid: DocumentId, + external_docid: &'a str, + new_document: D, + new_fields_ids_map: &'a RefCell, + unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>, + new_must_regenerate: bool, + ) -> Result<()> { + let (default_is_user_provided, default_must_regenerate) = (false, true); + self.set_status( + docid, + default_is_user_provided, + default_must_regenerate, + false, + new_must_regenerate, + ); + + match &mut self.kind { + ChunkType::DocumentTemplate { document_template, session } => { + let doc_alloc = session.doc_alloc(); + let ex = DocumentTemplateExtractor::new( + document_template, + doc_alloc, + new_fields_ids_map, + ); + + insert_autogenerated( + docid, + external_docid, + [ex], + new_document, + &external_docid, + session, + unused_vectors_distribution, + )?; + } + ChunkType::Fragments { fragments, session } => { + let doc_alloc = session.doc_alloc(); + let extractors = fragments.iter().map(|fragment| { + RequestFragmentExtractor::new(fragment, doc_alloc).ignore_errors() + }); + + insert_autogenerated( + docid, + external_docid, + extractors, + new_document, + &(), + session, + unused_vectors_distribution, + )?; + } + } + Ok(()) + } + + pub fn drain(self, unused_vectors_distribution: &UnusedVectorsDistributionBump) -> Result<()> { + match self.kind { + ChunkType::DocumentTemplate { document_template: _, session } => { + session.drain(unused_vectors_distribution)?; + } + ChunkType::Fragments { fragments: _, session } => { + session.drain(unused_vectors_distribution)?; + } + } + Ok(()) } pub fn embedder_name(&self) -> &'a str { - self.embedder_name - } - - fn set_regenerate(&self, docid: DocumentId, regenerate: bool) { - let mut user_provided = self.user_provided.borrow_mut(); - let user_provided = user_provided.0.entry_ref(self.embedder_name).or_default(); - if regenerate { - // regenerate == !user_provided - user_provided.insert_del_u32(docid); - } else { - user_provided.insert_add_u32(docid); + match &self.kind { + ChunkType::DocumentTemplate { document_template: _, session } => { + session.embedder_name() + } + ChunkType::Fragments { fragments: _, session } => session.embedder_name(), } } - fn set_vectors( + fn set_status( &self, + docid: DocumentId, + old_is_user_provided: bool, + old_must_regenerate: bool, + new_is_user_provided: bool, + new_must_regenerate: bool, + ) { + if EmbeddingStatusDelta::needs_change( + old_is_user_provided, + old_must_regenerate, + new_is_user_provided, + new_must_regenerate, + ) { + let mut status_delta = self.status_delta.borrow_mut(); + let status_delta = status_delta.0.entry_ref(self.embedder_name()).or_default(); + status_delta.push_delta( + docid, + old_is_user_provided, + old_must_regenerate, + new_is_user_provided, + new_must_regenerate, + ); + } + } + + pub fn clear_status(&self, docid: DocumentId, is_user_provided: bool, must_regenerate: bool) { + // these value ensure both roaring are at 0. + if EmbeddingStatusDelta::needs_clear(is_user_provided, must_regenerate) { + let mut status_delta = self.status_delta.borrow_mut(); + let status_delta = status_delta.0.entry_ref(self.embedder_name()).or_default(); + status_delta.clear_docid(docid, is_user_provided, must_regenerate); + } + } + + pub fn set_vectors( + &mut self, external_docid: &'a str, docid: DocumentId, embeddings: Vec, + old_is_user_provided: bool, + old_must_regenerate: bool, + new_must_regenerate: bool, ) -> Result<()> { + self.set_status( + docid, + old_is_user_provided, + old_must_regenerate, + true, + new_must_regenerate, + ); for (embedding_index, embedding) in embeddings.iter().enumerate() { if embedding.len() != self.dimensions { return Err(UserError::InvalidIndexingVectorDimensions { expected: self.dimensions, found: embedding.len(), - embedder_name: self.embedder_name.to_string(), + embedder_name: self.embedder_name().to_string(), document_id: external_docid.to_string(), embedding_index, } .into()); } } - self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap(); + match &mut self.kind { + ChunkType::DocumentTemplate { document_template: _, session } => { + session.on_embed_mut().process_embeddings( + Metadata { docid, external_docid, extractor_id: 0 }, + embeddings, + ); + } + ChunkType::Fragments { fragments: _, session } => { + session.on_embed_mut().process_embeddings( + Metadata { docid, external_docid, extractor_id: 0 }, + embeddings, + ); + } + } + Ok(()) } + + fn has_fragments(&self) -> bool { + matches!(self.kind, ChunkType::Fragments { .. }) + } +} + +#[allow(clippy::too_many_arguments)] +fn update_autogenerated<'doc, 'a: 'doc, 'b, E, OD, ND>( + docid: DocumentId, + external_docid: &'a str, + extractors: impl IntoIterator, + old_document: OD, + new_document: ND, + meta: &E::DocumentMetadata, + old_must_regenerate: bool, + session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>, + unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>, +) -> Result<()> +where + OD: Document<'doc> + Debug, + ND: Document<'doc> + Debug, + E: VectorExtractor<'a>, + E::Input: Input, + crate::Error: From, +{ + for extractor in extractors { + let new_rendered = extractor.extract(&new_document, meta)?; + let must_regenerate = if !old_must_regenerate { + // we just enabled `regenerate` + true + } else { + let old_rendered = extractor.extract(&old_document, meta); + + if let Ok(old_rendered) = old_rendered { + // must regenerate if the rendered changed + new_rendered != old_rendered + } else { + // cannot check previous rendered, better regenerate + true + } + }; + + if must_regenerate { + let metadata = + Metadata { docid, external_docid, extractor_id: extractor.extractor_id() }; + + if let Some(new_rendered) = new_rendered { + session.request_embedding(metadata, new_rendered, unused_vectors_distribution)? + } else { + // remove any existing embedding + OnEmbed::process_embedding_response( + session.on_embed_mut(), + crate::vector::session::EmbeddingResponse { metadata, embedding: None }, + ); + } + } + } + + Ok(()) +} + +fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>( + docid: DocumentId, + external_docid: &'a str, + extractors: impl IntoIterator, + new_document: D, + meta: &E::DocumentMetadata, + session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>, + unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>, +) -> Result<()> +where + E: VectorExtractor<'a>, + E::Input: Input, + crate::Error: From, +{ + for extractor in extractors { + let new_rendered = extractor.extract(&new_document, meta)?; + + if let Some(new_rendered) = new_rendered { + session.request_embedding( + Metadata { docid, external_docid, extractor_id: extractor.extractor_id() }, + new_rendered, + unused_vectors_distribution, + )?; + } + } + + Ok(()) } diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs index 5302c9d05..c88751ee3 100644 --- a/crates/milli/src/update/new/indexer/document_changes.rs +++ b/crates/milli/src/update/new/indexer/document_changes.rs @@ -3,100 +3,18 @@ use std::sync::atomic::Ordering; use std::sync::{Arc, RwLock}; use bumpalo::Bump; -use heed::{RoTxn, WithoutTls}; use rayon::iter::IndexedParallelIterator; use super::super::document_change::DocumentChange; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::progress::{AtomicDocumentStep, Progress}; +use crate::update::new::document::DocumentContext; use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; use crate::update::new::steps::IndexingStep; use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; use crate::update::GrenadParameters; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result}; -pub struct DocumentChangeContext< - 'doc, // covariant lifetime of a single `process` call - 'extractor: 'doc, // invariant lifetime of the extractor_allocs - 'fid: 'doc, // invariant lifetime of the new_fields_ids_map - 'indexer: 'doc, // covariant lifetime of objects that outlive a single `process` call - T: MostlySend, -> { - /// The index we're indexing in - pub index: &'indexer Index, - /// The fields ids map as it was at the start of this indexing process. Contains at least all top-level fields from documents - /// inside of the DB. - pub db_fields_ids_map: &'indexer FieldsIdsMap, - /// A transaction providing data from the DB before all indexing operations - pub rtxn: RoTxn<'indexer, WithoutTls>, - - /// Global field id map that is up to date with the current state of the indexing process. - /// - /// - Inserting a field will take a lock - /// - Retrieving a field may take a lock as well - pub new_fields_ids_map: &'doc std::cell::RefCell>, - - /// Data allocated in this allocator is cleared between each call to `process`. - pub doc_alloc: Bump, - - /// Data allocated in this allocator is not cleared between each call to `process`, unless the data spills. - pub extractor_alloc: &'extractor Bump, - - /// Pool of doc allocators, used to retrieve the doc allocator we provided for the documents - doc_allocs: &'doc ThreadLocal>>, - - /// Extractor-specific data - pub data: &'doc T, -} - -impl< - 'doc, // covariant lifetime of a single `process` call - 'data: 'doc, // invariant on T lifetime of the datastore - 'extractor: 'doc, // invariant lifetime of extractor_allocs - 'fid: 'doc, // invariant lifetime of fields ids map - 'indexer: 'doc, // covariant lifetime of objects that survive a `process` call - T: MostlySend, - > DocumentChangeContext<'doc, 'extractor, 'fid, 'indexer, T> -{ - #[allow(clippy::too_many_arguments)] - pub fn new( - index: &'indexer Index, - db_fields_ids_map: &'indexer FieldsIdsMap, - new_fields_ids_map: &'fid RwLock, - extractor_allocs: &'extractor ThreadLocal>, - doc_allocs: &'doc ThreadLocal>>, - datastore: &'data ThreadLocal, - fields_ids_map_store: &'doc ThreadLocal>>>, - init_data: F, - ) -> Result - where - F: FnOnce(&'extractor Bump) -> Result, - { - let doc_alloc = - doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024)))); - let doc_alloc = doc_alloc.0.take(); - let fields_ids_map = fields_ids_map_store - .get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into()); - - let fields_ids_map = &fields_ids_map.0; - let extractor_alloc = extractor_allocs.get_or_default(); - - let data = datastore.get_or_try(move || init_data(&extractor_alloc.0))?; - - let txn = index.read_txn()?; - Ok(DocumentChangeContext { - index, - rtxn: txn, - db_fields_ids_map, - new_fields_ids_map: fields_ids_map, - doc_alloc, - extractor_alloc: &extractor_alloc.0, - data, - doc_allocs, - }) - } -} - /// An internal iterator (i.e. using `foreach`) of `DocumentChange`s pub trait Extractor<'extractor>: Sync { type Data: MostlySend; @@ -106,7 +24,7 @@ pub trait Extractor<'extractor>: Sync { fn process<'doc>( &'doc self, changes: impl Iterator>>, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, ) -> Result<()>; } @@ -125,7 +43,7 @@ pub trait DocumentChanges<'pl // lifetime of the underlying payload fn item_to_document_change<'doc, // lifetime of a single `process` call T: MostlySend>( &'doc self, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, item: &'doc Self::Item, ) -> Result>> where 'pl: 'doc // the payload must survive the process calls ; @@ -224,7 +142,7 @@ where let pi = document_changes.iter(CHUNK_SIZE); pi.try_arc_for_each_try_init( || { - DocumentChangeContext::new( + DocumentContext::new( index, db_fields_ids_map, new_fields_ids_map, diff --git a/crates/milli/src/update/new/indexer/document_deletion.rs b/crates/milli/src/update/new/indexer/document_deletion.rs index c4a72a2a1..157e20bb0 100644 --- a/crates/milli/src/update/new/indexer/document_deletion.rs +++ b/crates/milli/src/update/new/indexer/document_deletion.rs @@ -4,10 +4,11 @@ use rayon::iter::IndexedParallelIterator; use rayon::slice::ParallelSlice as _; use roaring::RoaringBitmap; -use super::document_changes::{DocumentChangeContext, DocumentChanges}; +use super::document_changes::DocumentChanges; use crate::documents::PrimaryKey; +use crate::update::new::document::DocumentContext; use crate::update::new::thread_local::MostlySend; -use crate::update::new::{Deletion, DocumentChange}; +use crate::update::new::{DocumentChange, DocumentIdentifiers}; use crate::{DocumentId, Result}; #[derive(Default)] @@ -58,7 +59,7 @@ impl<'pl> DocumentChanges<'pl> for DocumentDeletionChanges<'pl> { T: MostlySend, >( &'doc self, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, docid: &'doc Self::Item, ) -> Result>> where @@ -74,7 +75,10 @@ impl<'pl> DocumentChanges<'pl> for DocumentDeletionChanges<'pl> { let external_document_id = external_document_id.to_bump(&context.doc_alloc); - Ok(Some(DocumentChange::Deletion(Deletion::create(*docid, external_document_id)))) + Ok(Some(DocumentChange::Deletion(DocumentIdentifiers::create( + *docid, + external_document_id, + )))) } fn len(&self) -> usize { @@ -93,9 +97,8 @@ mod test { use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::index::tests::TempIndex; use crate::progress::Progress; - use crate::update::new::indexer::document_changes::{ - extract, DocumentChangeContext, Extractor, IndexingContext, - }; + use crate::update::new::document::DocumentContext; + use crate::update::new::indexer::document_changes::{extract, Extractor, IndexingContext}; use crate::update::new::indexer::DocumentDeletion; use crate::update::new::steps::IndexingStep; use crate::update::new::thread_local::{MostlySend, ThreadLocal}; @@ -125,7 +128,7 @@ mod test { fn process<'doc>( &self, changes: impl Iterator>>, - context: &DocumentChangeContext, + context: &DocumentContext, ) -> crate::Result<()> { for change in changes { let change = change?; diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs index ca433c043..98faaf145 100644 --- a/crates/milli/src/update/new/indexer/document_operation.rs +++ b/crates/milli/src/update/new/indexer/document_operation.rs @@ -12,14 +12,14 @@ use serde_json::value::RawValue; use serde_json::Deserializer; use super::super::document_change::DocumentChange; -use super::document_changes::{DocumentChangeContext, DocumentChanges}; +use super::document_changes::DocumentChanges; use super::guess_primary_key::retrieve_or_guess_primary_key; use crate::documents::PrimaryKey; use crate::progress::{AtomicPayloadStep, Progress}; -use crate::update::new::document::Versions; +use crate::update::new::document::{DocumentContext, Versions}; use crate::update::new::steps::IndexingStep; use crate::update::new::thread_local::MostlySend; -use crate::update::new::{Deletion, Insertion, Update}; +use crate::update::new::{DocumentIdentifiers, Insertion, Update}; use crate::update::{AvailableIds, IndexDocumentsMethod}; use crate::{DocumentId, Error, FieldsIdsMap, Index, InternalError, Result, UserError}; @@ -411,7 +411,7 @@ impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> { fn item_to_document_change<'doc, T: MostlySend + 'doc>( &'doc self, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, item: &'doc Self::Item, ) -> Result>> where @@ -577,7 +577,7 @@ impl<'pl> PayloadOperations<'pl> { if self.is_new { Ok(None) } else { - let deletion = Deletion::create(self.docid, external_doc); + let deletion = DocumentIdentifiers::create(self.docid, external_doc); Ok(Some(DocumentChange::Deletion(deletion))) } } diff --git a/crates/milli/src/update/new/indexer/extract.rs b/crates/milli/src/update/new/indexer/extract.rs index bb36ddc37..abfb4d6da 100644 --- a/crates/milli/src/update/new/indexer/extract.rs +++ b/crates/milli/src/update/new/indexer/extract.rs @@ -12,14 +12,18 @@ use super::super::steps::IndexingStep; use super::super::thread_local::{FullySend, ThreadLocal}; use super::super::FacetFieldIdsDelta; use super::document_changes::{extract, DocumentChanges, IndexingContext}; -use crate::index::IndexEmbeddingConfig; -use crate::progress::MergingWordCache; +use super::settings_changes::settings_change_extract; +use crate::documents::{FieldIdMapper, PrimaryKey}; +use crate::progress::{EmbedderStats, MergingWordCache}; use crate::proximity::ProximityPrecision; use crate::update::new::extract::EmbeddingExtractor; +use crate::update::new::indexer::settings_changes::DocumentsIndentifiers; use crate::update::new::merger::merge_and_send_rtree; use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases}; -use crate::vector::EmbeddingConfigs; -use crate::{Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; +use crate::update::settings::SettingsDelta; +use crate::vector::db::{EmbedderInfo, IndexEmbeddingConfig}; +use crate::vector::RuntimeEmbedders; +use crate::{Index, InternalError, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; #[allow(clippy::too_many_arguments)] pub(super) fn extract_all<'pl, 'extractor, DC, MSP>( @@ -27,13 +31,14 @@ pub(super) fn extract_all<'pl, 'extractor, DC, MSP>( indexing_context: IndexingContext, indexer_span: Span, extractor_sender: ExtractorBbqueueSender, - embedders: &EmbeddingConfigs, + embedders: &RuntimeEmbedders, extractor_allocs: &'extractor mut ThreadLocal>, finished_extraction: &AtomicBool, field_distribution: &mut BTreeMap, mut index_embeddings: Vec, document_ids: &mut RoaringBitmap, modified_docids: &mut RoaringBitmap, + embedder_stats: &EmbedderStats, ) -> Result<(FacetFieldIdsDelta, Vec)> where DC: DocumentChanges<'pl>, @@ -245,6 +250,7 @@ where embedders, embedding_sender, field_distribution, + embedder_stats, request_threads(), ); let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); @@ -265,14 +271,19 @@ where let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors"); let _entered = span.enter(); + let embedder_configs = index.embedding_configs(); for config in &mut index_embeddings { + let mut infos = embedder_configs.embedder_info(&rtxn, &config.name)?.unwrap(); + 'data: for data in datastore.iter_mut() { let data = &mut data.get_mut().0; - let Some(deladd) = data.remove(&config.name) else { + let Some(delta) = data.remove(&config.name) else { continue 'data; }; - deladd.apply_to(&mut config.user_provided, modified_docids); + delta.apply_to(&mut infos.embedding_status); } + + extractor_sender.embeddings().embedding_status(&config.name, infos).unwrap(); } } } @@ -312,6 +323,122 @@ where Result::Ok((facet_field_ids_delta, index_embeddings)) } +#[allow(clippy::too_many_arguments)] +pub(super) fn extract_all_settings_changes( + indexing_context: IndexingContext, + indexer_span: Span, + extractor_sender: ExtractorBbqueueSender, + settings_delta: &SD, + extractor_allocs: &mut ThreadLocal>, + finished_extraction: &AtomicBool, + field_distribution: &mut BTreeMap, + mut index_embeddings: Vec, + embedder_stats: &EmbedderStats, +) -> Result> +where + MSP: Fn() -> bool + Sync, + SD: SettingsDelta + Sync, +{ + // Create the list of document ids to extract + let rtxn = indexing_context.index.read_txn()?; + let all_document_ids = + indexing_context.index.documents_ids(&rtxn)?.into_iter().collect::>(); + let primary_key = + primary_key_from_db(indexing_context.index, &rtxn, &indexing_context.db_fields_ids_map)?; + let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key); + + let span = + tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract"); + let _entered = span.enter(); + + update_database_documents( + &documents, + indexing_context, + &extractor_sender, + settings_delta, + extractor_allocs, + )?; + + 'vectors: { + if settings_delta.embedder_actions().is_empty() { + break 'vectors; + } + + let embedding_sender = extractor_sender.embeddings(); + + // extract the remaining embeddings + let extractor = SettingsChangeEmbeddingExtractor::new( + settings_delta, + embedder_stats, + embedding_sender, + field_distribution, + request_threads(), + ); + let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); + { + let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors"); + let _entered = span.enter(); + + settings_change_extract( + &documents, + &extractor, + indexing_context, + extractor_allocs, + &datastore, + IndexingStep::ExtractingEmbeddings, + )?; + } + { + let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors"); + let _entered = span.enter(); + + let embedder_configs = indexing_context.index.embedding_configs(); + for config in &mut index_embeddings { + // retrieve infos for existing embedder or create a fresh one + let mut infos = + embedder_configs.embedder_info(&rtxn, &config.name)?.unwrap_or_else(|| { + let embedder_id = + *settings_delta.new_embedder_category_id().get(&config.name).unwrap(); + EmbedderInfo { embedder_id, embedding_status: Default::default() } + }); + + 'data: for data in datastore.iter_mut() { + let data = &mut data.get_mut().0; + let Some(delta) = data.remove(&config.name) else { + continue 'data; + }; + delta.apply_to(&mut infos.embedding_status); + } + + extractor_sender.embeddings().embedding_status(&config.name, infos).unwrap(); + } + } + } + + indexing_context.progress.update_progress(IndexingStep::WaitingForDatabaseWrites); + finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); + + Result::Ok(index_embeddings) +} + +fn primary_key_from_db<'indexer>( + index: &'indexer Index, + rtxn: &'indexer heed::RoTxn<'_>, + fields: &'indexer impl FieldIdMapper, +) -> Result> { + let Some(primary_key) = index.primary_key(rtxn)? else { + return Err(InternalError::DatabaseMissingEntry { + db_name: crate::index::db_name::MAIN, + key: Some(crate::index::main_key::PRIMARY_KEY_KEY), + } + .into()); + }; + let Some(primary_key) = PrimaryKey::new(primary_key, fields) else { + unreachable!("Primary key must exist at this point"); + }; + Ok(primary_key) +} + fn request_threads() -> &'static ThreadPoolNoAbort { static REQUEST_THREADS: OnceLock = OnceLock::new(); diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 2ea3c787e..a6ba3a919 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -1,5 +1,6 @@ +use std::collections::BTreeMap; use std::sync::atomic::AtomicBool; -use std::sync::{Once, RwLock}; +use std::sync::{Arc, Once, RwLock}; use std::thread::{self, Builder}; use big_s::S; @@ -19,9 +20,11 @@ use super::steps::IndexingStep; use super::thread_local::ThreadLocal; use crate::documents::PrimaryKey; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; -use crate::progress::Progress; +use crate::progress::{EmbedderStats, Progress}; +use crate::update::settings::SettingsDelta; use crate::update::GrenadParameters; -use crate::vector::{ArroyWrapper, EmbeddingConfigs}; +use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments}; +use crate::vector::{ArroyWrapper, Embedder, RuntimeEmbedders}; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort}; pub(crate) mod de; @@ -32,6 +35,7 @@ mod extract; mod guess_primary_key; mod partial_dump; mod post_processing; +pub mod settings_changes; mod update_by_function; mod write; @@ -40,8 +44,6 @@ static LOG_MEMORY_METRICS_ONCE: Once = Once::new(); /// This is the main function of this crate. /// /// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`]. -/// -/// TODO return stats #[allow(clippy::too_many_arguments)] // clippy: 😝 pub fn index<'pl, 'indexer, 'index, DC, MSP>( wtxn: &mut RwTxn, @@ -52,9 +54,10 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>( new_fields_ids_map: FieldsIdsMap, new_primary_key: Option>, document_changes: &DC, - embedders: EmbeddingConfigs, + embedders: RuntimeEmbedders, must_stop_processing: &'indexer MSP, progress: &'indexer Progress, + embedder_stats: &'indexer EmbedderStats, ) -> Result where DC: DocumentChanges<'pl>, @@ -65,48 +68,8 @@ where let arroy_memory = grenad_parameters.max_memory; - // We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch - // is because we still use the old indexer for the settings and it is highly impacted by the - // max memory. So we keep the changes here and will remove these changes once we use the new - // indexer to also index settings. Related to #5125 and #5141. - let grenad_parameters = GrenadParameters { - max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100), - ..grenad_parameters - }; - - // 5% percent of the allocated memory for the extractors, or min 100MiB - // 5% percent of the allocated memory for the bbqueues, or min 50MiB - // - // Minimum capacity for bbqueues - let minimum_total_bbbuffer_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB - let minimum_total_extractors_capacity = minimum_total_bbbuffer_capacity * 2; - - let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or( - ( - GrenadParameters { - max_memory: Some(minimum_total_extractors_capacity), - ..grenad_parameters - }, - minimum_total_bbbuffer_capacity, - ), // 100 MiB by thread by default - |max_memory| { - let total_bbbuffer_capacity = max_memory.max(minimum_total_bbbuffer_capacity); - let new_grenad_parameters = GrenadParameters { - max_memory: Some(max_memory.max(minimum_total_extractors_capacity)), - ..grenad_parameters - }; - (new_grenad_parameters, total_bbbuffer_capacity) - }, - ); - - LOG_MEMORY_METRICS_ONCE.call_once(|| { - tracing::debug!( - "Indexation allocated memory metrics - \ - Total BBQueue size: {total_bbbuffer_capacity}, \ - Total extractor memory: {:?}", - grenad_parameters.max_memory, - ); - }); + let (grenad_parameters, total_bbbuffer_capacity) = + indexer_memory_settings(pool.current_num_threads(), grenad_parameters); let (extractor_sender, writer_receiver) = pool .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000)) @@ -130,7 +93,7 @@ where grenad_parameters: &grenad_parameters, }; - let index_embeddings = index.embedding_configs(wtxn)?; + let index_embeddings = index.embedding_configs().embedding_configs(wtxn)?; let mut field_distribution = index.field_distribution(wtxn)?; let mut document_ids = index.documents_ids(wtxn)?; let mut modified_docids = roaring::RoaringBitmap::new(); @@ -158,6 +121,7 @@ where index_embeddings, document_ids, modified_docids, + embedder_stats, ) }) .unwrap() @@ -169,20 +133,21 @@ where let arroy_writers: Result> = embedders .inner_as_ref() .iter() - .map(|(embedder_name, (embedder, _, was_quantized))| { - let embedder_index = index.embedder_category_id.get(wtxn, embedder_name)?.ok_or( - InternalError::DatabaseMissingEntry { + .map(|(embedder_name, runtime)| { + let embedder_index = index + .embedding_configs() + .embedder_id(wtxn, embedder_name)? + .ok_or(InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None, - }, - )?; + })?; - let dimensions = embedder.dimensions(); - let writer = ArroyWrapper::new(vector_arroy, embedder_index, *was_quantized); + let dimensions = runtime.embedder.dimensions(); + let writer = ArroyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized); Ok(( embedder_index, - (embedder_name.as_str(), embedder.as_ref(), writer, dimensions), + (embedder_name.as_str(), &*runtime.embedder, writer, dimensions), )) }) .collect(); @@ -206,6 +171,7 @@ where index_embeddings, arroy_memory, &mut arroy_writers, + None, &indexing_context.must_stop_processing, ) }) @@ -239,3 +205,275 @@ where Ok(congestion) } + +#[allow(clippy::too_many_arguments)] +pub fn reindex<'indexer, 'index, MSP, SD>( + wtxn: &mut RwTxn<'index>, + index: &'index Index, + pool: &ThreadPoolNoAbort, + grenad_parameters: GrenadParameters, + settings_delta: &'indexer SD, + must_stop_processing: &'indexer MSP, + progress: &'indexer Progress, + embedder_stats: Arc, +) -> Result +where + MSP: Fn() -> bool + Sync, + SD: SettingsDelta + Sync, +{ + delete_old_embedders_and_fragments(wtxn, index, settings_delta)?; + + let mut bbbuffers = Vec::new(); + let finished_extraction = AtomicBool::new(false); + + let arroy_memory = grenad_parameters.max_memory; + + let (grenad_parameters, total_bbbuffer_capacity) = + indexer_memory_settings(pool.current_num_threads(), grenad_parameters); + + let (extractor_sender, writer_receiver) = pool + .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000)) + .unwrap(); + + let mut extractor_allocs = ThreadLocal::with_capacity(rayon::current_num_threads()); + + let db_fields_ids_map = index.fields_ids_map(wtxn)?; + let new_fields_ids_map = settings_delta.new_fields_ids_map().clone(); + let new_fields_ids_map = RwLock::new(new_fields_ids_map); + let fields_ids_map_store = ThreadLocal::with_capacity(rayon::current_num_threads()); + let doc_allocs = ThreadLocal::with_capacity(rayon::current_num_threads()); + + let indexing_context = IndexingContext { + index, + db_fields_ids_map: &db_fields_ids_map, + new_fields_ids_map: &new_fields_ids_map, + doc_allocs: &doc_allocs, + fields_ids_map_store: &fields_ids_map_store, + must_stop_processing, + progress, + grenad_parameters: &grenad_parameters, + }; + + let index_embeddings = index.embedding_configs().embedding_configs(wtxn)?; + let mut field_distribution = index.field_distribution(wtxn)?; + + let congestion = thread::scope(|s| -> Result { + let indexer_span = tracing::Span::current(); + let finished_extraction = &finished_extraction; + // prevent moving the field_distribution and document_ids in the inner closure... + let field_distribution = &mut field_distribution; + let extractor_handle = + Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { + pool.install(move || { + extract::extract_all_settings_changes( + indexing_context, + indexer_span, + extractor_sender, + settings_delta, + &mut extractor_allocs, + finished_extraction, + field_distribution, + index_embeddings, + &embedder_stats, + ) + }) + .unwrap() + })?; + + let new_embedders = settings_delta.new_embedders(); + let embedder_actions = settings_delta.embedder_actions(); + let index_embedder_category_ids = settings_delta.new_embedder_category_id(); + let mut arroy_writers = arroy_writers_from_embedder_actions( + index, + embedder_actions, + new_embedders, + index_embedder_category_ids, + )?; + + let congestion = + write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?; + + indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); + + let index_embeddings = extractor_handle.join().unwrap()?; + + indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase); + + pool.install(|| { + build_vectors( + index, + wtxn, + indexing_context.progress, + index_embeddings, + arroy_memory, + &mut arroy_writers, + Some(embedder_actions), + &indexing_context.must_stop_processing, + ) + }) + .unwrap()?; + + indexing_context.progress.update_progress(IndexingStep::Finalizing); + + Ok(congestion) as Result<_> + })?; + + // required to into_inner the new_fields_ids_map + drop(fields_ids_map_store); + + let new_fields_ids_map = new_fields_ids_map.into_inner().unwrap(); + let document_ids = index.documents_ids(wtxn)?; + update_index( + index, + wtxn, + new_fields_ids_map, + None, + settings_delta.new_embedders().clone(), + field_distribution, + document_ids, + )?; + + Ok(congestion) +} + +fn arroy_writers_from_embedder_actions<'indexer>( + index: &Index, + embedder_actions: &'indexer BTreeMap, + embedders: &'indexer RuntimeEmbedders, + index_embedder_category_ids: &'indexer std::collections::HashMap, +) -> Result> { + let vector_arroy = index.vector_arroy; + + embedders + .inner_as_ref() + .iter() + .filter_map(|(embedder_name, runtime)| match embedder_actions.get(embedder_name) { + None => None, + Some(action) if action.write_back().is_some() => None, + Some(action) => { + let Some(&embedder_category_id) = index_embedder_category_ids.get(embedder_name) + else { + return Some(Err(crate::error::Error::InternalError( + crate::InternalError::DatabaseMissingEntry { + db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID, + key: None, + }, + ))); + }; + let writer = + ArroyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized); + let dimensions = runtime.embedder.dimensions(); + Some(Ok(( + embedder_category_id, + (embedder_name.as_str(), runtime.embedder.as_ref(), writer, dimensions), + ))) + } + }) + .collect() +} + +fn delete_old_embedders_and_fragments( + wtxn: &mut RwTxn<'_>, + index: &Index, + settings_delta: &SD, +) -> Result<()> +where + SD: SettingsDelta, +{ + for action in settings_delta.embedder_actions().values() { + let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else { + continue; + }; + let reader = ArroyWrapper::new(index.vector_arroy, *embedder_id, action.was_quantized); + let Some(dimensions) = reader.dimensions(wtxn)? else { + continue; + }; + reader.clear(wtxn, dimensions)?; + } + + // remove all vectors for the specified fragments + for (embedder_name, RemoveFragments { fragment_ids }, was_quantized) in + settings_delta.embedder_actions().iter().filter_map(|(name, action)| { + action.remove_fragments().map(|fragments| (name, fragments, action.was_quantized)) + }) + { + let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else { + continue; + }; + let arroy = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, was_quantized); + let Some(dimensions) = arroy.dimensions(wtxn)? else { + continue; + }; + for fragment_id in fragment_ids { + // we must keep the user provided embeddings that ended up in this store + + if infos.embedding_status.user_provided_docids().is_empty() { + // no user provided: clear store + arroy.clear_store(wtxn, *fragment_id, dimensions)?; + continue; + } + + // some user provided, remove only the ids that are not user provided + let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| { + items - infos.embedding_status.user_provided_docids() + })?; + + for to_delete in to_delete { + arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?; + } + } + } + + Ok(()) +} + +fn indexer_memory_settings( + current_num_threads: usize, + grenad_parameters: GrenadParameters, +) -> (GrenadParameters, usize) { + // We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch + // is because we still use the old indexer for the settings and it is highly impacted by the + // max memory. So we keep the changes here and will remove these changes once we use the new + // indexer to also index settings. Related to #5125 and #5141. + let grenad_parameters = GrenadParameters { + max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100), + ..grenad_parameters + }; + + // 5% percent of the allocated memory for the extractors, or min 100MiB + // 5% percent of the allocated memory for the bbqueues, or min 50MiB + // + // Minimum capacity for bbqueues + let minimum_total_bbbuffer_capacity = 50 * 1024 * 1024 * current_num_threads; + // 50 MiB + let minimum_total_extractors_capacity = minimum_total_bbbuffer_capacity * 2; + + let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or( + ( + GrenadParameters { + max_memory: Some(minimum_total_extractors_capacity), + ..grenad_parameters + }, + minimum_total_bbbuffer_capacity, + ), // 100 MiB by thread by default + |max_memory| { + let total_bbbuffer_capacity = max_memory.max(minimum_total_bbbuffer_capacity); + let new_grenad_parameters = GrenadParameters { + max_memory: Some(max_memory.max(minimum_total_extractors_capacity)), + ..grenad_parameters + }; + (new_grenad_parameters, total_bbbuffer_capacity) + }, + ); + + LOG_MEMORY_METRICS_ONCE.call_once(|| { + tracing::debug!( + "Indexation allocated memory metrics - \ + Total BBQueue size: {total_bbbuffer_capacity}, \ + Total extractor memory: {:?}", + grenad_parameters.max_memory, + ); + }); + + (grenad_parameters, total_bbbuffer_capacity) +} diff --git a/crates/milli/src/update/new/indexer/partial_dump.rs b/crates/milli/src/update/new/indexer/partial_dump.rs index 6e4abd898..33e72f532 100644 --- a/crates/milli/src/update/new/indexer/partial_dump.rs +++ b/crates/milli/src/update/new/indexer/partial_dump.rs @@ -5,10 +5,10 @@ use rayon::iter::IndexedParallelIterator; use rustc_hash::FxBuildHasher; use serde_json::value::RawValue; -use super::document_changes::{DocumentChangeContext, DocumentChanges}; +use super::document_changes::DocumentChanges; use crate::documents::PrimaryKey; use crate::update::concurrent_available_ids::ConcurrentAvailableIds; -use crate::update::new::document::Versions; +use crate::update::new::document::{DocumentContext, Versions}; use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::thread_local::MostlySend; use crate::update::new::{DocumentChange, Insertion}; @@ -55,7 +55,7 @@ where fn item_to_document_change<'doc, T: MostlySend + 'doc>( &'doc self, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, document: &'doc Self::Item, ) -> Result>> where diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs index b5c89d0d9..288b9c5ed 100644 --- a/crates/milli/src/update/new/indexer/post_processing.rs +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -131,7 +131,12 @@ fn compute_word_fst( } } -pub fn recompute_word_fst_from_word_docids_database(index: &Index, wtxn: &mut RwTxn) -> Result<()> { +pub fn recompute_word_fst_from_word_docids_database( + index: &Index, + wtxn: &mut RwTxn, + progress: &Progress, +) -> Result<()> { + progress.update_progress(PostProcessingWords::WordFst); let fst = fst::Set::default().map_data(std::borrow::Cow::Owned)?; let mut word_fst_builder = WordFstBuilder::new(&fst)?; let words = index.word_docids.iter(wtxn)?.remap_data_type::(); diff --git a/crates/milli/src/update/new/indexer/settings_changes.rs b/crates/milli/src/update/new/indexer/settings_changes.rs new file mode 100644 index 000000000..984ab3a0b --- /dev/null +++ b/crates/milli/src/update/new/indexer/settings_changes.rs @@ -0,0 +1,146 @@ +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use bumpalo::Bump; +use rayon::iter::IndexedParallelIterator; +use rayon::slice::ParallelSlice; + +use super::document_changes::IndexingContext; +use crate::documents::PrimaryKey; +use crate::progress::AtomicDocumentStep; +use crate::update::new::document::{DocumentContext, DocumentIdentifiers}; +use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; +use crate::update::new::steps::IndexingStep; +use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; +use crate::{DocumentId, InternalError, Result}; + +/// An internal iterator (i.e. using `foreach`) of `DocumentChange`s +pub trait SettingsChangeExtractor<'extractor>: Sync { + type Data: MostlySend; + + fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> Result; + + fn process<'doc>( + &'doc self, + documents: impl Iterator>>, + context: &'doc DocumentContext, + ) -> Result<()>; +} +pub struct DocumentsIndentifiers<'indexer> { + documents: &'indexer [DocumentId], + primary_key: PrimaryKey<'indexer>, +} + +impl<'indexer> DocumentsIndentifiers<'indexer> { + pub fn new(documents: &'indexer [DocumentId], primary_key: PrimaryKey<'indexer>) -> Self { + Self { documents, primary_key } + } + + fn iter(&self, chunk_size: usize) -> impl IndexedParallelIterator { + self.documents.par_chunks(chunk_size) + } + + fn item_to_database_document< + 'doc, // lifetime of a single `process` call + T: MostlySend, + >( + &'doc self, + context: &'doc DocumentContext, + docid: &'doc DocumentId, + ) -> Result>> { + let current = context.index.document(&context.rtxn, *docid)?; + + let external_document_id = self.primary_key.extract_docid_from_db( + current, + &context.db_fields_ids_map, + &context.doc_alloc, + )?; + + let external_document_id = external_document_id.to_bump(&context.doc_alloc); + + Ok(Some(DocumentIdentifiers::create(*docid, external_document_id))) + } + + fn len(&self) -> usize { + self.documents.len() + } +} + +const CHUNK_SIZE: usize = 100; + +pub fn settings_change_extract< + 'extractor, // invariant lifetime of extractor_alloc + 'fid, // invariant lifetime of fields ids map + 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing + 'data, // invariant on EX::Data lifetime of datastore + 'index, // covariant lifetime of the index + EX: SettingsChangeExtractor<'extractor>, + MSP: Fn() -> bool + Sync, +>( + documents: &'indexer DocumentsIndentifiers<'indexer>, + extractor: &EX, + IndexingContext { + index, + db_fields_ids_map, + new_fields_ids_map, + doc_allocs, + fields_ids_map_store, + must_stop_processing, + progress, + grenad_parameters: _, + }: IndexingContext<'fid, 'indexer, 'index, MSP>, + extractor_allocs: &'extractor mut ThreadLocal>, + datastore: &'data ThreadLocal, + step: IndexingStep, +) -> Result<()> { + tracing::trace!("We are resetting the extractor allocators"); + progress.update_progress(step); + // Clean up and reuse the extractor allocs + for extractor_alloc in extractor_allocs.iter_mut() { + tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes()); + extractor_alloc.0.reset(); + } + + let total_documents = documents.len() as u32; + let (step, progress_step) = AtomicDocumentStep::new(total_documents); + progress.update_progress(progress_step); + + let pi = documents.iter(CHUNK_SIZE); + pi.try_arc_for_each_try_init( + || { + DocumentContext::new( + index, + db_fields_ids_map, + new_fields_ids_map, + extractor_allocs, + doc_allocs, + datastore, + fields_ids_map_store, + move |index_alloc| extractor.init_data(index_alloc), + ) + }, + |context, items| { + if (must_stop_processing)() { + return Err(Arc::new(InternalError::AbortedIndexation.into())); + } + + // Clean up and reuse the document-specific allocator + context.doc_alloc.reset(); + + let documents = items + .iter() + .filter_map(|item| documents.item_to_database_document(context, item).transpose()); + + let res = extractor.process(documents, context).map_err(Arc::new); + step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed); + + // send back the doc_alloc in the pool + context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc)); + + res + }, + )?; + step.store(total_documents, Ordering::Relaxed); + + Ok(()) +} diff --git a/crates/milli/src/update/new/indexer/update_by_function.rs b/crates/milli/src/update/new/indexer/update_by_function.rs index 3001648e6..daffe42ed 100644 --- a/crates/milli/src/update/new/indexer/update_by_function.rs +++ b/crates/milli/src/update/new/indexer/update_by_function.rs @@ -5,15 +5,14 @@ use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST}; use roaring::RoaringBitmap; use rustc_hash::FxBuildHasher; -use super::document_changes::DocumentChangeContext; use super::DocumentChanges; use crate::documents::Error::InvalidDocumentFormat; use crate::documents::PrimaryKey; use crate::error::{FieldIdMapMissingEntry, InternalError}; -use crate::update::new::document::Versions; +use crate::update::new::document::{DocumentContext, Versions}; use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::thread_local::MostlySend; -use crate::update::new::{Deletion, DocumentChange, KvReaderFieldId, Update}; +use crate::update::new::{DocumentChange, DocumentIdentifiers, KvReaderFieldId, Update}; use crate::{all_obkv_to_json, Error, FieldsIdsMap, Object, Result, UserError}; pub struct UpdateByFunction { @@ -86,13 +85,13 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> { fn item_to_document_change<'doc, T: MostlySend + 'doc>( &self, - context: &'doc DocumentChangeContext, + context: &'doc DocumentContext, docid: &'doc Self::Item, ) -> Result>> where 'index: 'doc, { - let DocumentChangeContext { + let DocumentContext { index, db_fields_ids_map, rtxn: txn, @@ -128,10 +127,9 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> { match scope.remove::("doc") { // If the "doc" variable has been set to (), we effectively delete the document. - Some(doc) if doc.is_unit() => Ok(Some(DocumentChange::Deletion(Deletion::create( - docid, - doc_alloc.alloc_str(&document_id), - )))), + Some(doc) if doc.is_unit() => Ok(Some(DocumentChange::Deletion( + DocumentIdentifiers::create(docid, doc_alloc.alloc_str(&document_id)), + ))), None => unreachable!("missing doc variable from the Rhai scope"), Some(new_document) => match new_document.try_cast() { Some(new_rhai_document) => { diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index 5a600eeb3..b8e3685f8 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use std::sync::atomic::AtomicBool; use bstr::ByteSlice as _; @@ -10,10 +11,11 @@ use super::super::channel::*; use crate::database_stats::DatabaseStats; use crate::documents::PrimaryKey; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; -use crate::index::IndexEmbeddingConfig; use crate::progress::Progress; use crate::update::settings::InnerIndexSettings; -use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; +use crate::vector::db::IndexEmbeddingConfig; +use crate::vector::settings::EmbedderAction; +use crate::vector::{ArroyWrapper, Embedder, Embeddings, RuntimeEmbedders}; use crate::{Error, Index, InternalError, Result, UserError}; pub fn write_to_db( @@ -62,6 +64,14 @@ pub fn write_to_db( writer.del_items(wtxn, *dimensions, docid)?; writer.add_items(wtxn, docid, &embeddings)?; } + ReceiverAction::LargeVector( + large_vector @ LargeVector { docid, embedder_id, extractor_id, .. }, + ) => { + let (_, _, writer, dimensions) = + arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + let embedding = large_vector.read_embedding(*dimensions); + writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?; + } } // Every time the is a message in the channel we search @@ -99,6 +109,7 @@ impl ChannelCongestion { } #[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")] +#[allow(clippy::too_many_arguments)] pub fn build_vectors( index: &Index, wtxn: &mut RwTxn<'_>, @@ -106,6 +117,7 @@ pub fn build_vectors( index_embeddings: Vec, arroy_memory: Option, arroy_writers: &mut HashMap, + embeder_actions: Option<&BTreeMap>, must_stop_processing: &MSP, ) -> Result<()> where @@ -117,20 +129,23 @@ where let seed = rand::random(); let mut rng = rand::rngs::StdRng::seed_from_u64(seed); - for (_index, (_embedder_name, _embedder, writer, dimensions)) in arroy_writers { + for (_index, (embedder_name, _embedder, writer, dimensions)) in arroy_writers { let dimensions = *dimensions; + let is_being_quantized = embeder_actions + .and_then(|actions| actions.get(*embedder_name).map(|action| action.is_being_quantized)) + .unwrap_or(false); writer.build_and_quantize( wtxn, progress, &mut rng, dimensions, - false, + is_being_quantized, arroy_memory, must_stop_processing, )?; } - index.put_embedding_configs(wtxn, index_embeddings)?; + index.embedding_configs().put_embedding_configs(wtxn, index_embeddings)?; Ok(()) } @@ -140,7 +155,7 @@ pub(super) fn update_index( wtxn: &mut RwTxn<'_>, new_fields_ids_map: FieldIdMapWithMetadata, new_primary_key: Option>, - embedders: EmbeddingConfigs, + embedders: RuntimeEmbedders, field_distribution: std::collections::BTreeMap, document_ids: roaring::RoaringBitmap, ) -> Result<()> { @@ -219,14 +234,36 @@ pub fn write_from_bbqueue( arroy_writers.get(&embedder_id).expect("requested a missing embedder"); let mut embeddings = Embeddings::new(*dimensions); let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); - if embeddings.append(all_embeddings.to_vec()).is_err() { - return Err(Error::UserError(UserError::InvalidVectorDimensions { - expected: *dimensions, - found: all_embeddings.len(), - })); - } writer.del_items(wtxn, *dimensions, docid)?; - writer.add_items(wtxn, docid, &embeddings)?; + if !all_embeddings.is_empty() { + if embeddings.append(all_embeddings.to_vec()).is_err() { + return Err(Error::UserError(UserError::InvalidVectorDimensions { + expected: *dimensions, + found: all_embeddings.len(), + })); + } + writer.add_items(wtxn, docid, &embeddings)?; + } + } + EntryHeader::ArroySetVector( + asv @ ArroySetVector { docid, embedder_id, extractor_id, .. }, + ) => { + let frame = frame_with_header.frame(); + let (_, _, writer, dimensions) = + arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + let embedding = asv.read_all_embeddings_into_vec(frame, aligned_embedding); + + if embedding.is_empty() { + writer.del_item_in_store(wtxn, docid, extractor_id, *dimensions)?; + } else { + if embedding.len() != *dimensions { + return Err(Error::UserError(UserError::InvalidVectorDimensions { + expected: *dimensions, + found: embedding.len(), + })); + } + writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?; + } } } } diff --git a/crates/milli/src/update/new/mod.rs b/crates/milli/src/update/new/mod.rs index 81ff93e54..ffe27ffda 100644 --- a/crates/milli/src/update/new/mod.rs +++ b/crates/milli/src/update/new/mod.rs @@ -1,4 +1,5 @@ -pub use document_change::{Deletion, DocumentChange, Insertion, Update}; +pub use document::DocumentIdentifiers; +pub use document_change::{DocumentChange, Insertion, Update}; pub use indexer::ChannelCongestion; pub use merger::{ merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta, diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs index a52dab6a1..b59984248 100644 --- a/crates/milli/src/update/new/vector_document.rs +++ b/crates/milli/src/update/new/vector_document.rs @@ -12,9 +12,9 @@ use super::document::{Document, DocumentFromDb, DocumentFromVersions, Versions}; use super::indexer::de::DeserrRawValue; use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::documents::FieldIdMapper; -use crate::index::IndexEmbeddingConfig; +use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig}; use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors}; -use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfigs}; +use crate::vector::{ArroyWrapper, Embedding, RuntimeEmbedders}; use crate::{DocumentId, Index, InternalError, Result, UserError}; #[derive(Serialize)] @@ -109,7 +109,7 @@ impl<'t> VectorDocumentFromDb<'t> { None => None, }; - let embedding_config = index.embedding_configs(rtxn)?; + let embedding_config = index.embedding_configs().embedding_configs(rtxn)?; Ok(Some(Self { docid, embedding_config, index, vectors_field, rtxn, doc_alloc })) } @@ -118,6 +118,7 @@ impl<'t> VectorDocumentFromDb<'t> { &self, embedder_id: u8, config: &IndexEmbeddingConfig, + status: &EmbeddingStatus, ) -> Result> { let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_id, config.config.quantized()); @@ -126,7 +127,7 @@ impl<'t> VectorDocumentFromDb<'t> { Ok(VectorEntry { has_configured_embedder: true, embeddings: Some(Embeddings::FromDb(vectors)), - regenerate: !config.user_provided.contains(self.docid), + regenerate: status.must_regenerate(self.docid), implicit: false, }) } @@ -137,9 +138,9 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> { self.embedding_config .iter() .map(|config| { - let embedder_id = - self.index.embedder_category_id.get(self.rtxn, &config.name)?.unwrap(); - let entry = self.entry_from_db(embedder_id, config)?; + let info = + self.index.embedding_configs().embedder_info(self.rtxn, &config.name)?.unwrap(); + let entry = self.entry_from_db(info.embedder_id, config, &info.embedding_status)?; let config_name = self.doc_alloc.alloc_str(config.name.as_str()); Ok((&*config_name, entry)) }) @@ -156,11 +157,11 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> { } fn vectors_for_key(&self, key: &str) -> Result>> { - Ok(match self.index.embedder_category_id.get(self.rtxn, key)? { - Some(embedder_id) => { + Ok(match self.index.embedding_configs().embedder_info(self.rtxn, key)? { + Some(info) => { let config = self.embedding_config.iter().find(|config| config.name == key).unwrap(); - Some(self.entry_from_db(embedder_id, config)?) + Some(self.entry_from_db(info.embedder_id, config, &info.embedding_status)?) } None => match self.vectors_field.as_ref().and_then(|obkv| obkv.get(key)) { Some(embedding_from_doc) => { @@ -222,7 +223,7 @@ fn entry_from_raw_value( pub struct VectorDocumentFromVersions<'doc> { external_document_id: &'doc str, vectors: RawMap<'doc, FxBuildHasher>, - embedders: &'doc EmbeddingConfigs, + embedders: &'doc RuntimeEmbedders, } impl<'doc> VectorDocumentFromVersions<'doc> { @@ -230,7 +231,7 @@ impl<'doc> VectorDocumentFromVersions<'doc> { external_document_id: &'doc str, versions: &Versions<'doc>, bump: &'doc Bump, - embedders: &'doc EmbeddingConfigs, + embedders: &'doc RuntimeEmbedders, ) -> Result> { let document = DocumentFromVersions::new(versions); if let Some(vectors_field) = document.vectors_field()? { @@ -283,7 +284,7 @@ impl<'doc> MergedVectorDocument<'doc> { db_fields_ids_map: &'doc Mapper, versions: &Versions<'doc>, doc_alloc: &'doc Bump, - embedders: &'doc EmbeddingConfigs, + embedders: &'doc RuntimeEmbedders, ) -> Result> { let db = VectorDocumentFromDb::new(docid, index, rtxn, db_fields_ids_map, doc_alloc)?; let new_doc = @@ -295,7 +296,7 @@ impl<'doc> MergedVectorDocument<'doc> { external_document_id: &'doc str, versions: &Versions<'doc>, doc_alloc: &'doc Bump, - embedders: &'doc EmbeddingConfigs, + embedders: &'doc RuntimeEmbedders, ) -> Result> { let Some(new_doc) = VectorDocumentFromVersions::new(external_document_id, versions, doc_alloc, embedders)? diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index bb965ba69..911f51865 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -7,35 +7,43 @@ use std::sync::Arc; use charabia::{Normalize, Tokenizer, TokenizerBuilder}; use deserr::{DeserializeError, Deserr}; use itertools::{merge_join_by, EitherOrBoth, Itertools}; -use roaring::RoaringBitmap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use time::OffsetDateTime; +use super::chat::ChatSearchParams; use super::del_add::{DelAdd, DelAddOperation}; use super::index_documents::{IndexDocumentsConfig, Transform}; -use super::IndexerConfig; +use super::{ChatSettings, IndexerConfig}; use crate::attribute_patterns::PatternMatch; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::criterion::Criterion; use crate::disabled_typos_terms::DisabledTyposTerms; -use crate::error::UserError; +use crate::error::UserError::{self, InvalidChatSettingsDocumentTemplateMaxBytes}; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::filterable_attributes_rules::match_faceted_field; use crate::index::{ - IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO, + ChatConfig, PrefixSearch, SearchParameters, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, }; use crate::order_by_map::OrderByMap; -use crate::prompt::default_max_bytes; +use crate::progress::{EmbedderStats, Progress}; +use crate::prompt::{default_max_bytes, default_template_text, PromptData}; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; +use crate::update::new::indexer::reindex; use crate::update::{IndexDocuments, UpdateIndexingStep}; +use crate::vector::db::{FragmentConfigs, IndexEmbeddingConfig}; +use crate::vector::json_template::JsonTemplate; use crate::vector::settings::{ - EmbedderAction, EmbedderSource, EmbeddingSettings, NestingContext, ReindexAction, - SubEmbeddingSettings, WriteBackToDocuments, + EmbedderAction, EmbedderSource, EmbeddingSettings, EmbeddingValidationContext, NestingContext, + ReindexAction, SubEmbeddingSettings, WriteBackToDocuments, +}; +use crate::vector::{ + Embedder, EmbeddingConfig, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment, +}; +use crate::{ + ChannelCongestion, FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result, }; -use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; -use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result}; #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum Setting { @@ -185,6 +193,7 @@ pub struct Settings<'a, 't, 'i> { localized_attributes_rules: Setting>, prefix_search: Setting, facet_search: Setting, + chat: Setting, } impl<'a, 't, 'i> Settings<'a, 't, 'i> { @@ -223,6 +232,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { localized_attributes_rules: Setting::NotSet, prefix_search: Setting::NotSet, facet_search: Setting::NotSet, + chat: Setting::NotSet, indexer_config, } } @@ -453,9 +463,17 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.facet_search = Setting::Reset; } + pub fn set_chat(&mut self, value: ChatSettings) { + self.chat = Setting::Set(value); + } + + pub fn reset_chat(&mut self) { + self.chat = Setting::Reset; + } + #[tracing::instrument( level = "trace" - skip(self, progress_callback, should_abort, settings_diff), + skip(self, progress_callback, should_abort, settings_diff, embedder_stats), target = "indexing::documents" )] fn reindex( @@ -463,6 +481,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { progress_callback: &FP, should_abort: &FA, settings_diff: InnerIndexSettingsDiff, + embedder_stats: &Arc, ) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, @@ -494,6 +513,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { IndexDocumentsConfig::default(), &progress_callback, &should_abort, + embedder_stats, )?; indexing_builder.execute_raw(output)?; @@ -884,7 +904,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { disabled_typos_terms.disable_on_numbers = disable_on_numbers; } Setting::Reset => { - self.index.delete_disabled_typos_terms(self.wtxn)?; disabled_typos_terms.disable_on_numbers = DisabledTyposTerms::default().disable_on_numbers; } @@ -1027,22 +1046,27 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { match std::mem::take(&mut self.embedder_settings) { Setting::Set(configs) => self.update_embedding_configs_set(configs), Setting::Reset => { + let embedders = self.index.embedding_configs(); // all vectors should be written back to documents - let old_configs = self.index.embedding_configs(self.wtxn)?; + let old_configs = embedders.embedding_configs(self.wtxn)?; let remove_all: Result> = old_configs .into_iter() - .map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> { - let embedder_id = - self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( - crate::InternalError::DatabaseMissingEntry { - db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID, - key: None, - }, - )?; + .map(|IndexEmbeddingConfig { name, config, fragments: _ }| -> Result<_> { + let embedder_info = embedders.embedder_info(self.wtxn, &name)?.ok_or( + crate::InternalError::DatabaseMissingEntry { + db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID, + key: None, + }, + )?; Ok(( name, EmbedderAction::with_write_back( - WriteBackToDocuments { embedder_id, user_provided }, + WriteBackToDocuments { + embedder_id: embedder_info.embedder_id, + user_provided: embedder_info + .embedding_status + .into_user_provided(), + }, config.quantized(), ), )) @@ -1052,7 +1076,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let remove_all = remove_all?; self.index.embedder_category_id.clear(self.wtxn)?; - self.index.delete_embedding_configs(self.wtxn)?; + embedders.delete_embedding_configs(self.wtxn)?; Ok(remove_all) } Setting::NotSet => Ok(Default::default()), @@ -1064,12 +1088,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { configs: BTreeMap>, ) -> Result> { use crate::vector::settings::SettingsDiff; - - let old_configs = self.index.embedding_configs(self.wtxn)?; - let old_configs: BTreeMap = old_configs + let embedders = self.index.embedding_configs(); + let old_configs = embedders.embedding_configs(self.wtxn)?; + let old_configs: BTreeMap = old_configs .into_iter() - .map(|IndexEmbeddingConfig { name, config, user_provided }| { - (name, (config.into(), user_provided)) + .map(|IndexEmbeddingConfig { name, config, fragments }| { + (name, (config.into(), fragments)) }) .collect(); let mut updated_configs = BTreeMap::new(); @@ -1080,71 +1104,111 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { { match joined { // updated config - EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { + EitherOrBoth::Both((name, (old, mut fragments)), (_, new)) => { let was_quantized = old.binary_quantized.set().unwrap_or_default(); let settings_diff = SettingsDiff::from_settings(&name, old, new)?; match settings_diff { SettingsDiff::Remove => { + let info = embedders.remove_embedder(self.wtxn, &name)?.ok_or( + crate::InternalError::DatabaseMissingEntry { + db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID, + key: None, + }, + )?; tracing::debug!( embedder = name, - user_provided = user_provided.len(), + user_provided = info.embedding_status.user_provided_docids().len(), "removing embedder" ); - let embedder_id = - self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( - crate::InternalError::DatabaseMissingEntry { - db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID, - key: None, - }, - )?; - // free id immediately - self.index.embedder_category_id.delete(self.wtxn, &name)?; embedder_actions.insert( name, EmbedderAction::with_write_back( - WriteBackToDocuments { embedder_id, user_provided }, + WriteBackToDocuments { + embedder_id: info.embedder_id, + user_provided: info.embedding_status.into_user_provided(), + }, was_quantized, ), ); } SettingsDiff::Reindex { action, updated_settings, quantize } => { - tracing::debug!( - embedder = name, - user_provided = user_provided.len(), - ?action, - "reindex embedder" - ); - embedder_actions.insert( - name.clone(), + let mut remove_fragments = None; + let updated_settings = Setting::Set(updated_settings); + if let ReindexAction::RegenerateFragments(regenerate_fragments) = + &action + { + let it = regenerate_fragments + .iter() + .filter(|(_, action)| { + matches!( + action, + crate::vector::settings::RegenerateFragment::Remove + ) + }) + .map(|(name, _)| name.as_str()); + + remove_fragments = fragments.remove_fragments(it); + + let it = regenerate_fragments + .iter() + .filter(|(_, action)| { + matches!( + action, + crate::vector::settings::RegenerateFragment::Add + ) + }) + .map(|(name, _)| name.clone()); + fragments.add_new_fragments(it)?; + } else { + // needs full reindex of fragments + fragments = FragmentConfigs::new(); + fragments.add_new_fragments( + crate::vector::settings::fragments_from_settings( + &updated_settings, + ), + )?; + } + tracing::debug!(embedder = name, ?action, "reindex embedder"); + + let embedder_action = EmbedderAction::with_reindex(action, was_quantized) - .with_is_being_quantized(quantize), - ); - let new = - validate_embedding_settings(Setting::Set(updated_settings), &name)?; - updated_configs.insert(name, (new, user_provided)); + .with_is_being_quantized(quantize); + + let embedder_action = if let Some(remove_fragments) = remove_fragments { + embedder_action.with_remove_fragments(remove_fragments) + } else { + embedder_action + }; + + embedder_actions.insert(name.clone(), embedder_action); + let new = validate_embedding_settings( + updated_settings, + &name, + EmbeddingValidationContext::FullSettings, + )?; + updated_configs.insert(name, (new, fragments)); } SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => { - tracing::debug!( - embedder = name, - user_provided = user_provided.len(), - "update without reindex embedder" - ); - let new = - validate_embedding_settings(Setting::Set(updated_settings), &name)?; + tracing::debug!(embedder = name, "update without reindex embedder"); + let new = validate_embedding_settings( + Setting::Set(updated_settings), + &name, + EmbeddingValidationContext::FullSettings, + )?; if quantize { embedder_actions.insert( name.clone(), EmbedderAction::default().with_is_being_quantized(true), ); } - updated_configs.insert(name, (new, user_provided)); + updated_configs.insert(name, (new, fragments)); } } } // unchanged config - EitherOrBoth::Left((name, (setting, user_provided))) => { + EitherOrBoth::Left((name, (setting, fragments))) => { tracing::debug!(embedder = name, "unchanged embedder"); - updated_configs.insert(name, (Setting::Set(setting), user_provided)); + updated_configs.insert(name, (Setting::Set(setting), fragments)); } // new config EitherOrBoth::Right((name, mut setting)) => { @@ -1154,52 +1218,51 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { crate::vector::settings::EmbeddingSettings::apply_default_openai_model( &mut setting, ); - let setting = validate_embedding_settings(setting, &name)?; + let setting = validate_embedding_settings( + setting, + &name, + EmbeddingValidationContext::FullSettings, + )?; embedder_actions.insert( name.clone(), EmbedderAction::with_reindex(ReindexAction::FullReindex, false), ); - updated_configs.insert(name, (setting, RoaringBitmap::new())); + let mut fragments = FragmentConfigs::new(); + fragments.add_new_fragments( + crate::vector::settings::fragments_from_settings(&setting), + )?; + updated_configs.insert(name, (setting, fragments)); } } } - let mut free_indices: [bool; u8::MAX as usize] = [true; u8::MAX as usize]; - for res in self.index.embedder_category_id.iter(self.wtxn)? { - let (_name, id) = res?; - free_indices[id as usize] = false; - } - let mut free_indices = free_indices.iter_mut().enumerate(); - let mut find_free_index = - move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); - for (name, action) in embedder_actions.iter() { - // ignore actions that are not possible for a new embedder - if matches!(action.reindex(), Some(ReindexAction::FullReindex)) - && self.index.embedder_category_id.get(self.wtxn, name)?.is_none() - { - let id = - find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; - tracing::debug!(embedder = name, id, "assigning free id to new embedder"); - self.index.embedder_category_id.put(self.wtxn, name, &id)?; - } - } + embedders.add_new_embedders( + self.wtxn, + embedder_actions + .iter() + // ignore actions that are not possible for a new embedder, most critically deleted embedders + .filter(|(_, action)| matches!(action.reindex(), Some(ReindexAction::FullReindex))) + .map(|(name, _)| name.as_str()), + updated_configs.len(), + )?; + let updated_configs: Vec = updated_configs .into_iter() - .filter_map(|(name, (config, user_provided))| match config { + .filter_map(|(name, (config, fragments))| match config { Setting::Set(config) => { - Some(IndexEmbeddingConfig { name, config: config.into(), user_provided }) + Some(IndexEmbeddingConfig { name, config: config.into(), fragments }) } Setting::Reset => None, Setting::NotSet => Some(IndexEmbeddingConfig { name, config: EmbeddingSettings::default().into(), - user_provided, + fragments: Default::default(), }), }) .collect(); if updated_configs.is_empty() { - self.index.delete_embedding_configs(self.wtxn)?; + embedders.delete_embedding_configs(self.wtxn)?; } else { - self.index.put_embedding_configs(self.wtxn, updated_configs)?; + embedders.put_embedding_configs(self.wtxn, updated_configs)?; } Ok(embedder_actions) } @@ -1239,7 +1302,118 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { Ok(()) } - pub fn execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> + fn update_chat_config(&mut self) -> Result { + match &mut self.chat { + Setting::Set(ChatSettings { + description: new_description, + document_template: new_document_template, + document_template_max_bytes: new_document_template_max_bytes, + search_parameters: new_search_parameters, + }) => { + let ChatConfig { description, prompt, search_parameters } = + self.index.chat_config(self.wtxn)?; + + let description = match new_description { + Setting::Set(new) => new.clone(), + Setting::Reset => Default::default(), + Setting::NotSet => description, + }; + + let prompt = PromptData { + template: match new_document_template { + Setting::Set(new) => new.clone(), + Setting::Reset => default_template_text().to_string(), + Setting::NotSet => prompt.template.clone(), + }, + max_bytes: match new_document_template_max_bytes { + Setting::Set(m) => Some( + NonZeroUsize::new(*m) + .ok_or(InvalidChatSettingsDocumentTemplateMaxBytes)?, + ), + Setting::Reset => Some(default_max_bytes()), + Setting::NotSet => prompt.max_bytes, + }, + }; + + let search_parameters = match new_search_parameters { + Setting::Set(sp) => { + let ChatSearchParams { + hybrid, + limit, + sort, + distinct, + matching_strategy, + attributes_to_search_on, + ranking_score_threshold, + } = sp; + + SearchParameters { + hybrid: match hybrid { + Setting::Set(hybrid) => Some(crate::index::HybridQuery { + semantic_ratio: *hybrid.semantic_ratio, + embedder: hybrid.embedder.clone(), + }), + Setting::Reset => None, + Setting::NotSet => search_parameters.hybrid.clone(), + }, + limit: match limit { + Setting::Set(limit) => Some(*limit), + Setting::Reset => None, + Setting::NotSet => search_parameters.limit, + }, + sort: match sort { + Setting::Set(sort) => Some(sort.clone()), + Setting::Reset => None, + Setting::NotSet => search_parameters.sort.clone(), + }, + distinct: match distinct { + Setting::Set(distinct) => Some(distinct.clone()), + Setting::Reset => None, + Setting::NotSet => search_parameters.distinct.clone(), + }, + matching_strategy: match matching_strategy { + Setting::Set(matching_strategy) => Some(*matching_strategy), + Setting::Reset => None, + Setting::NotSet => search_parameters.matching_strategy, + }, + attributes_to_search_on: match attributes_to_search_on { + Setting::Set(attributes_to_search_on) => { + Some(attributes_to_search_on.clone()) + } + Setting::Reset => None, + Setting::NotSet => { + search_parameters.attributes_to_search_on.clone() + } + }, + ranking_score_threshold: match ranking_score_threshold { + Setting::Set(rst) => Some(*rst), + Setting::Reset => None, + Setting::NotSet => search_parameters.ranking_score_threshold, + }, + } + } + Setting::Reset => Default::default(), + Setting::NotSet => search_parameters, + }; + + self.index.put_chat_config( + self.wtxn, + &ChatConfig { description, prompt, search_parameters }, + )?; + + Ok(true) + } + Setting::Reset => self.index.delete_chat_config(self.wtxn).map_err(Into::into), + Setting::NotSet => Ok(false), + } + } + + pub fn legacy_execute( + mut self, + progress_callback: FP, + should_abort: FA, + embedder_stats: Arc, + ) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, @@ -1276,6 +1450,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.update_facet_search()?; self.update_localized_attributes_rules()?; self.update_disabled_typos_terms()?; + self.update_chat_config()?; let embedding_config_updates = self.update_embedding_configs()?; @@ -1296,11 +1471,113 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { ); if inner_settings_diff.any_reindexing_needed() { - self.reindex(&progress_callback, &should_abort, inner_settings_diff)?; + self.reindex(&progress_callback, &should_abort, inner_settings_diff, &embedder_stats)?; } Ok(()) } + + pub fn execute<'indexer, MSP>( + mut self, + must_stop_processing: &'indexer MSP, + progress: &'indexer Progress, + embedder_stats: Arc, + ) -> Result> + where + MSP: Fn() -> bool + Sync, + { + // force the old indexer if the environment says so + if self.indexer_config.experimental_no_edition_2024_for_settings { + return self + .legacy_execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + must_stop_processing, + embedder_stats, + ) + .map(|_| None); + } + + // only use the new indexer when only the embedder possibly changed + if let Self { + searchable_fields: Setting::NotSet, + displayed_fields: Setting::NotSet, + filterable_fields: Setting::NotSet, + sortable_fields: Setting::NotSet, + criteria: Setting::NotSet, + stop_words: Setting::NotSet, + non_separator_tokens: Setting::NotSet, + separator_tokens: Setting::NotSet, + dictionary: Setting::NotSet, + distinct_field: Setting::NotSet, + synonyms: Setting::NotSet, + primary_key: Setting::NotSet, + authorize_typos: Setting::NotSet, + min_word_len_two_typos: Setting::NotSet, + min_word_len_one_typo: Setting::NotSet, + exact_words: Setting::NotSet, + exact_attributes: Setting::NotSet, + max_values_per_facet: Setting::NotSet, + sort_facet_values_by: Setting::NotSet, + pagination_max_total_hits: Setting::NotSet, + proximity_precision: Setting::NotSet, + embedder_settings: _, + search_cutoff: Setting::NotSet, + localized_attributes_rules: Setting::NotSet, + prefix_search: Setting::NotSet, + facet_search: Setting::NotSet, + disable_on_numbers: Setting::NotSet, + chat: Setting::NotSet, + wtxn: _, + index: _, + indexer_config: _, + } = &self + { + self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; + + let old_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?; + + // Update index settings + let embedding_config_updates = self.update_embedding_configs()?; + + let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?; + + let primary_key_id = self + .index + .primary_key(self.wtxn)? + .and_then(|name| new_inner_settings.fields_ids_map.id(name)); + let settings_update_only = true; + let inner_settings_diff = InnerIndexSettingsDiff::new( + old_inner_settings, + new_inner_settings, + primary_key_id, + embedding_config_updates, + settings_update_only, + ); + + if self.index.number_of_documents(self.wtxn)? > 0 { + reindex( + self.wtxn, + self.index, + &self.indexer_config.thread_pool, + self.indexer_config.grenad_parameters(), + &inner_settings_diff, + must_stop_processing, + progress, + embedder_stats, + ) + .map(Some) + } else { + Ok(None) + } + } else { + self.legacy_execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + must_stop_processing, + embedder_stats, + ) + .map(|_| None) + } + } } pub struct InnerIndexSettingsDiff { @@ -1312,6 +1589,7 @@ pub struct InnerIndexSettingsDiff { /// The set of only the additional searchable fields. /// If any other searchable field has been modified, is set to None. pub(crate) only_additional_fields: Option>, + fragment_diffs: BTreeMap, usize)>>, // Cache the check to see if all the stop_words, allowed_separators, dictionary, // exact_attributes, proximity_precision are different. @@ -1380,13 +1658,13 @@ impl InnerIndexSettingsDiff { // if the user-defined searchables changed, then we need to reindex prompts. if cache_user_defined_searchables { - for (embedder_name, (config, _, _quantized)) in - new_settings.embedding_configs.inner_as_ref() - { - let was_quantized = - old_settings.embedding_configs.get(embedder_name).is_some_and(|conf| conf.2); + for (embedder_name, runtime) in new_settings.runtime_embedders.inner_as_ref() { + let was_quantized = old_settings + .runtime_embedders + .get(embedder_name) + .is_some_and(|conf| conf.is_quantized); // skip embedders that don't use document templates - if !config.uses_document_template() { + if !runtime.embedder.uses_document_template() { continue; } @@ -1399,22 +1677,86 @@ impl InnerIndexSettingsDiff { was_quantized, )); } - std::collections::btree_map::Entry::Occupied(entry) => { + std::collections::btree_map::Entry::Occupied(mut entry) => { + // future-proofing, make sure to destructure here so that any new field is taken into account in this case + // case in point: adding `remove_fragments` was detected. let EmbedderAction { was_quantized: _, is_being_quantized: _, - write_back: _, // We are deleting this embedder, so no point in regeneration - reindex: _, // We are already fully reindexing - } = entry.get(); + write_back, // We are deleting this embedder, so no point in regeneration + reindex, + remove_fragments: _, + } = entry.get_mut(); + + // fixup reindex to make sure we regenerate all fragments + *reindex = match reindex.take() { + Some(reindex) => Some(reindex), // We are at least regenerating prompts + None => { + if write_back.is_none() { + Some(ReindexAction::RegeneratePrompts) // quantization case + } else { + None + } + } + }; } }; } } + // build the fragment diffs + let mut fragment_diffs = BTreeMap::new(); + for (embedder_name, embedder_action) in &embedding_config_updates { + let Some(new_embedder) = new_settings.runtime_embedders.get(embedder_name) else { + continue; + }; + let regenerate_fragments = + if let Some(ReindexAction::RegenerateFragments(regenerate_fragments)) = + embedder_action.reindex() + { + either::Either::Left( + regenerate_fragments + .iter() + .filter(|(_, action)| { + !matches!( + action, + crate::vector::settings::RegenerateFragment::Remove + ) + }) + .map(|(name, _)| name), + ) + } else { + either::Either::Right( + new_embedder.fragments().iter().map(|fragment| &fragment.name), + ) + }; + + let old_embedder = old_settings.runtime_embedders.get(embedder_name); + + let mut fragments = Vec::new(); + for fragment_name in regenerate_fragments { + let Ok(new) = new_embedder + .fragments() + .binary_search_by_key(&fragment_name, |fragment| &fragment.name) + else { + continue; + }; + let old = old_embedder.as_ref().and_then(|old_embedder| { + old_embedder + .fragments() + .binary_search_by_key(&fragment_name, |fragment| &fragment.name) + .ok() + }); + fragments.push((old, new)); + } + fragment_diffs.insert(embedder_name.clone(), fragments); + } + InnerIndexSettingsDiff { old: old_settings, new: new_settings, primary_key_id, + fragment_diffs, embedding_config_updates, settings_update_only, only_additional_fields, @@ -1559,7 +1901,8 @@ pub(crate) struct InnerIndexSettings { pub exact_attributes: HashSet, pub disabled_typos_terms: DisabledTyposTerms, pub proximity_precision: ProximityPrecision, - pub embedding_configs: EmbeddingConfigs, + pub runtime_embedders: RuntimeEmbedders, + pub embedder_category_id: HashMap, pub geo_fields_ids: Option<(FieldId, FieldId)>, pub prefix_search: PrefixSearch, pub facet_search: bool, @@ -1569,7 +1912,7 @@ impl InnerIndexSettings { pub fn from_index( index: &Index, rtxn: &heed::RoTxn<'_>, - embedding_configs: Option, + runtime_embedders: Option, ) -> Result { let stop_words = index.stop_words(rtxn)?; let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap()); @@ -1578,10 +1921,15 @@ impl InnerIndexSettings { let mut fields_ids_map = index.fields_ids_map(rtxn)?; let exact_attributes = index.exact_attributes_ids(rtxn)?; let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default(); - let embedding_configs = match embedding_configs { + let runtime_embedders = match runtime_embedders { Some(embedding_configs) => embedding_configs, - None => embedders(index.embedding_configs(rtxn)?)?, + None => embedders(index.embedding_configs().embedding_configs(rtxn)?)?, }; + let embedder_category_id = index + .embedding_configs() + .iter_embedder_id(rtxn)? + .map(|r| r.map(|(k, v)| (k.to_string(), v))) + .collect::>()?; let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default(); let facet_search = index.facet_search(rtxn)?; let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) { @@ -1620,7 +1968,8 @@ impl InnerIndexSettings { sortable_fields, exact_attributes, proximity_precision, - embedding_configs, + runtime_embedders, + embedder_category_id, geo_fields_ids, prefix_search, facet_search, @@ -1662,28 +2011,49 @@ impl InnerIndexSettings { } } -fn embedders(embedding_configs: Vec) -> Result { +fn embedders(embedding_configs: Vec) -> Result { let res: Result<_> = embedding_configs .into_iter() .map( |IndexEmbeddingConfig { name, config: EmbeddingConfig { embedder_options, prompt, quantized }, - .. + fragments, }| { - let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?); + let document_template = prompt.try_into().map_err(crate::Error::from)?; - let embedder = Arc::new( + let embedder = // cache_cap: no cache needed for indexing purposes - Embedder::new(embedder_options.clone(), 0) + Arc::new(Embedder::new(embedder_options.clone(), 0) .map_err(crate::vector::Error::from) - .map_err(crate::Error::from)?, - ); - Ok((name, (embedder, prompt, quantized.unwrap_or_default()))) + .map_err(crate::Error::from)?); + + let fragments = fragments + .into_inner() + .into_iter() + .map(|fragment| { + let template = JsonTemplate::new( + embedder_options.fragment(&fragment.name).unwrap().clone(), + ) + .unwrap(); + + RuntimeFragment { name: fragment.name, id: fragment.id, template } + }) + .collect(); + + Ok(( + name, + Arc::new(RuntimeEmbedder::new( + embedder, + document_template, + fragments, + quantized.unwrap_or_default(), + )), + )) }, ) .collect(); - res.map(EmbeddingConfigs::new) + res.map(RuntimeEmbedders::new) } fn validate_prompt( @@ -1720,6 +2090,7 @@ fn validate_prompt( pub fn validate_embedding_settings( settings: Setting, name: &str, + context: EmbeddingValidationContext, ) -> Result> { let Setting::Set(settings) = settings else { return Ok(settings) }; let EmbeddingSettings { @@ -1732,6 +2103,8 @@ pub fn validate_embedding_settings( document_template, document_template_max_bytes, url, + indexing_fragments, + search_fragments, request, response, search_embedder, @@ -1758,9 +2131,106 @@ pub fn validate_embedding_settings( })?; } + // used below + enum WithFragments { + Yes { + indexing_fragments: BTreeMap, + search_fragments: BTreeMap, + }, + No, + Maybe, + } + + let with_fragments = { + let has_reset = matches!(indexing_fragments, Setting::Reset) + || matches!(search_fragments, Setting::Reset); + let indexing_fragments: BTreeMap<_, _> = indexing_fragments + .as_ref() + .set() + .iter() + .flat_map(|map| map.iter()) + .filter_map(|(name, fragment)| { + Some((name.clone(), fragment.as_ref().map(|fragment| fragment.value.clone())?)) + }) + .collect(); + let search_fragments: BTreeMap<_, _> = search_fragments + .as_ref() + .set() + .iter() + .flat_map(|map| map.iter()) + .filter_map(|(name, fragment)| { + Some((name.clone(), fragment.as_ref().map(|fragment| fragment.value.clone())?)) + }) + .collect(); + + let has_fragments = !indexing_fragments.is_empty() || !search_fragments.is_empty(); + + if context == EmbeddingValidationContext::FullSettings { + let are_fragments_inconsistent = + indexing_fragments.is_empty() ^ search_fragments.is_empty(); + if are_fragments_inconsistent { + return Err(crate::vector::error::NewEmbedderError::rest_inconsistent_fragments( + indexing_fragments.is_empty(), + indexing_fragments, + search_fragments, + )) + .map_err(|error| crate::UserError::VectorEmbeddingError(error.into()).into()); + } + } + if has_fragments { + if context == EmbeddingValidationContext::SettingsPartialUpdate + && matches!(document_template, Setting::Set(_)) + { + return Err( + crate::vector::error::NewEmbedderError::rest_document_template_and_fragments( + indexing_fragments.len(), + search_fragments.len(), + ), + ) + .map_err(|error| crate::UserError::VectorEmbeddingError(error.into()).into()); + } + WithFragments::Yes { indexing_fragments, search_fragments } + } else if has_reset || context == EmbeddingValidationContext::FullSettings { + WithFragments::No + } else { + // if we are working with partial settings, the user could have changed only the `request` and not given again the fragments + WithFragments::Maybe + } + }; if let Some(request) = request.as_ref().set() { - let request = crate::vector::rest::Request::new(request.to_owned()) - .map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?; + let request = match with_fragments { + WithFragments::Yes { indexing_fragments, search_fragments } => { + crate::vector::rest::RequestData::new( + request.to_owned(), + indexing_fragments, + search_fragments, + ) + .map_err(|error| crate::UserError::VectorEmbeddingError(error.into())) + } + WithFragments::No => crate::vector::rest::RequestData::new( + request.to_owned(), + Default::default(), + Default::default(), + ) + .map_err(|error| crate::UserError::VectorEmbeddingError(error.into())), + WithFragments::Maybe => { + let mut indexing_fragments = BTreeMap::new(); + indexing_fragments.insert("test".to_string(), serde_json::json!("test")); + crate::vector::rest::RequestData::new( + request.to_owned(), + indexing_fragments, + Default::default(), + ) + .or_else(|_| { + crate::vector::rest::RequestData::new( + request.to_owned(), + Default::default(), + Default::default(), + ) + }) + .map_err(|error| crate::UserError::VectorEmbeddingError(error.into())) + } + }?; if let Some(response) = response.as_ref().set() { crate::vector::rest::Response::new(response.to_owned(), &request) .map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?; @@ -1779,6 +2249,8 @@ pub fn validate_embedding_settings( document_template, document_template_max_bytes, url, + indexing_fragments, + search_fragments, request, response, search_embedder, @@ -1798,6 +2270,8 @@ pub fn validate_embedding_settings( &dimensions, &api_key, &url, + &indexing_fragments, + &search_fragments, &request, &response, &document_template, @@ -1876,6 +2350,8 @@ pub fn validate_embedding_settings( &embedder.dimensions, &embedder.api_key, &embedder.url, + &embedder.indexing_fragments, + &embedder.search_fragments, &embedder.request, &embedder.response, &embedder.document_template, @@ -1931,6 +2407,8 @@ pub fn validate_embedding_settings( &embedder.dimensions, &embedder.api_key, &embedder.url, + &embedder.indexing_fragments, + &embedder.search_fragments, &embedder.request, &embedder.response, &embedder.document_template, @@ -1963,6 +2441,8 @@ pub fn validate_embedding_settings( document_template, document_template_max_bytes, url, + indexing_fragments, + search_fragments, request, response, search_embedder, @@ -1990,6 +2470,81 @@ fn deserialize_sub_embedder( } } +/// Implement this trait for the settings delta type. +/// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`. +pub trait SettingsDelta { + fn new_embedders(&self) -> &RuntimeEmbedders; + fn old_embedders(&self) -> &RuntimeEmbedders; + fn new_embedder_category_id(&self) -> &HashMap; + fn embedder_actions(&self) -> &BTreeMap; + fn try_for_each_fragment_diff( + &self, + embedder_name: &str, + for_each: F, + ) -> std::result::Result<(), E> + where + F: FnMut(FragmentDiff) -> std::result::Result<(), E>; + fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata; +} + +pub struct FragmentDiff<'a> { + pub old: Option<&'a RuntimeFragment>, + pub new: &'a RuntimeFragment, +} + +impl SettingsDelta for InnerIndexSettingsDiff { + fn new_embedders(&self) -> &RuntimeEmbedders { + &self.new.runtime_embedders + } + + fn old_embedders(&self) -> &RuntimeEmbedders { + &self.old.runtime_embedders + } + + fn new_embedder_category_id(&self) -> &HashMap { + &self.new.embedder_category_id + } + + fn embedder_actions(&self) -> &BTreeMap { + &self.embedding_config_updates + } + + fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata { + &self.new.fields_ids_map + } + + fn try_for_each_fragment_diff( + &self, + embedder_name: &str, + mut for_each: F, + ) -> std::result::Result<(), E> + where + F: FnMut(FragmentDiff) -> std::result::Result<(), E>, + { + let Some(fragment_diff) = self.fragment_diffs.get(embedder_name) else { return Ok(()) }; + for (old, new) in fragment_diff { + let Some(new_runtime) = self.new.runtime_embedders.get(embedder_name) else { + continue; + }; + + let new = new_runtime.fragments().get(*new).unwrap(); + + match old { + Some(old) => { + if let Some(old_runtime) = self.old.runtime_embedders.get(embedder_name) { + let old = &old_runtime.fragments().get(*old).unwrap(); + for_each(FragmentDiff { old: Some(old), new })?; + } else { + for_each(FragmentDiff { old: None, new })?; + } + } + None => for_each(FragmentDiff { old: None, new })?, + }; + } + Ok(()) + } +} + #[cfg(test)] #[path = "test_settings.rs"] mod tests; diff --git a/crates/milli/src/update/test_settings.rs b/crates/milli/src/update/test_settings.rs index 1adb96366..59e8d9ff1 100644 --- a/crates/milli/src/update/test_settings.rs +++ b/crates/milli/src/update/test_settings.rs @@ -897,6 +897,7 @@ fn test_correct_settings_init() { prefix_search, facet_search, disable_on_numbers, + chat, } = settings; assert!(matches!(searchable_fields, Setting::NotSet)); assert!(matches!(displayed_fields, Setting::NotSet)); @@ -925,6 +926,7 @@ fn test_correct_settings_init() { assert!(matches!(prefix_search, Setting::NotSet)); assert!(matches!(facet_search, Setting::NotSet)); assert!(matches!(disable_on_numbers, Setting::NotSet)); + assert!(matches!(chat, Setting::NotSet)); }) .unwrap(); } diff --git a/crates/milli/src/update/upgrade/v1_15.rs b/crates/milli/src/update/upgrade/v1_15.rs index 2c3cff355..cea4783a1 100644 --- a/crates/milli/src/update/upgrade/v1_15.rs +++ b/crates/milli/src/update/upgrade/v1_15.rs @@ -3,7 +3,7 @@ use heed::RwTxn; use super::UpgradeIndex; use crate::progress::Progress; use crate::update::new::indexer::recompute_word_fst_from_word_docids_database; -use crate::{make_enum_progress, Index, Result}; +use crate::{Index, Result}; #[allow(non_camel_case_types)] pub(super) struct Latest_V1_14_To_Latest_V1_15(); @@ -17,14 +17,7 @@ impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 { progress: Progress, ) -> Result { // Recompute the word FST from the word docids database. - make_enum_progress! { - enum TypoTolerance { - RecomputeWordFst, - } - }; - - progress.update_progress(TypoTolerance::RecomputeWordFst); - recompute_word_fst_from_word_docids_database(index, wtxn)?; + recompute_word_fst_from_word_docids_database(index, wtxn, &progress)?; Ok(false) } diff --git a/crates/milli/src/vector/composite.rs b/crates/milli/src/vector/composite.rs index 9c5992bd3..8314b8649 100644 --- a/crates/milli/src/vector/composite.rs +++ b/crates/milli/src/vector/composite.rs @@ -7,6 +7,7 @@ use super::{ hf, manual, ollama, openai, rest, DistributionShift, EmbedError, Embedding, EmbeddingCache, NewEmbedderError, }; +use crate::progress::EmbedderStats; use crate::ThreadPoolNoAbort; #[derive(Debug)] @@ -81,6 +82,7 @@ impl Embedder { "This is a sample text. It is meant to compare similarity.".into(), ], None, + None, ) .map_err(|error| NewEmbedderError::composite_test_embedding_failed(error, "search"))?; @@ -92,6 +94,7 @@ impl Embedder { "This is a sample text. It is meant to compare similarity.".into(), ], None, + None, ) .map_err(|error| { NewEmbedderError::composite_test_embedding_failed(error, "indexing") @@ -150,13 +153,14 @@ impl SubEmbedder { &self, texts: Vec, deadline: Option, + embedder_stats: Option<&EmbedderStats>, ) -> std::result::Result, EmbedError> { match self { SubEmbedder::HuggingFace(embedder) => embedder.embed(texts), - SubEmbedder::OpenAi(embedder) => embedder.embed(&texts, deadline), - SubEmbedder::Ollama(embedder) => embedder.embed(&texts, deadline), + SubEmbedder::OpenAi(embedder) => embedder.embed(&texts, deadline, embedder_stats), + SubEmbedder::Ollama(embedder) => embedder.embed(&texts, deadline, embedder_stats), SubEmbedder::UserProvided(embedder) => embedder.embed(&texts), - SubEmbedder::Rest(embedder) => embedder.embed(texts, deadline), + SubEmbedder::Rest(embedder) => embedder.embed(texts, deadline, embedder_stats), } } @@ -164,18 +168,21 @@ impl SubEmbedder { &self, text: &str, deadline: Option, + embedder_stats: Option<&EmbedderStats>, ) -> std::result::Result { match self { SubEmbedder::HuggingFace(embedder) => embedder.embed_one(text), - SubEmbedder::OpenAi(embedder) => { - embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding) - } - SubEmbedder::Ollama(embedder) => { - embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding) - } + SubEmbedder::OpenAi(embedder) => embedder + .embed(&[text], deadline, embedder_stats)? + .pop() + .ok_or_else(EmbedError::missing_embedding), + SubEmbedder::Ollama(embedder) => embedder + .embed(&[text], deadline, embedder_stats)? + .pop() + .ok_or_else(EmbedError::missing_embedding), SubEmbedder::UserProvided(embedder) => embedder.embed_one(text), SubEmbedder::Rest(embedder) => embedder - .embed_ref(&[text], deadline)? + .embed_ref(&[text], deadline, embedder_stats)? .pop() .ok_or_else(EmbedError::missing_embedding), } @@ -188,13 +195,20 @@ impl SubEmbedder { &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> std::result::Result>, EmbedError> { match self { SubEmbedder::HuggingFace(embedder) => embedder.embed_index(text_chunks), - SubEmbedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads), - SubEmbedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads), + SubEmbedder::OpenAi(embedder) => { + embedder.embed_index(text_chunks, threads, embedder_stats) + } + SubEmbedder::Ollama(embedder) => { + embedder.embed_index(text_chunks, threads, embedder_stats) + } SubEmbedder::UserProvided(embedder) => embedder.embed_index(text_chunks), - SubEmbedder::Rest(embedder) => embedder.embed_index(text_chunks, threads), + SubEmbedder::Rest(embedder) => { + embedder.embed_index(text_chunks, threads, embedder_stats) + } } } @@ -203,13 +217,18 @@ impl SubEmbedder { &self, texts: &[&str], threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> std::result::Result, EmbedError> { match self { SubEmbedder::HuggingFace(embedder) => embedder.embed_index_ref(texts), - SubEmbedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads), - SubEmbedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads), + SubEmbedder::OpenAi(embedder) => { + embedder.embed_index_ref(texts, threads, embedder_stats) + } + SubEmbedder::Ollama(embedder) => { + embedder.embed_index_ref(texts, threads, embedder_stats) + } SubEmbedder::UserProvided(embedder) => embedder.embed_index_ref(texts), - SubEmbedder::Rest(embedder) => embedder.embed_index_ref(texts, threads), + SubEmbedder::Rest(embedder) => embedder.embed_index_ref(texts, threads, embedder_stats), } } diff --git a/crates/milli/src/vector/db.rs b/crates/milli/src/vector/db.rs new file mode 100644 index 000000000..0e890fac9 --- /dev/null +++ b/crates/milli/src/vector/db.rs @@ -0,0 +1,443 @@ +//! Module containing types and methods to store meta-information about the embedders and fragments + +use std::borrow::Cow; + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use heed::types::{SerdeJson, Str, U8}; +use heed::{BytesEncode, Database, RoTxn, RwTxn, Unspecified}; +use roaring::RoaringBitmap; +use serde::{Deserialize, Serialize}; + +use crate::vector::settings::RemoveFragments; +use crate::vector::EmbeddingConfig; +use crate::{CboRoaringBitmapCodec, DocumentId, UserError}; + +#[derive(Debug, Deserialize, Serialize)] +pub struct IndexEmbeddingConfig { + pub name: String, + pub config: EmbeddingConfig, + #[serde(default)] + pub fragments: FragmentConfigs, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct FragmentConfigs(Vec); + +impl FragmentConfigs { + pub fn new() -> Self { + Default::default() + } + pub fn as_slice(&self) -> &[FragmentConfig] { + self.0.as_slice() + } + + pub fn into_inner(self) -> Vec { + self.0 + } + + pub fn remove_fragments<'a>( + &mut self, + fragments: impl IntoIterator, + ) -> Option { + let mut remove_fragments = Vec::new(); + for fragment in fragments { + let Ok(index_to_remove) = self.0.binary_search_by_key(&fragment, |f| &f.name) else { + continue; + }; + let fragment = self.0.swap_remove(index_to_remove); + remove_fragments.push(fragment.id); + } + (!remove_fragments.is_empty()).then_some(RemoveFragments { fragment_ids: remove_fragments }) + } + + pub fn add_new_fragments( + &mut self, + new_fragments: impl IntoIterator, + ) -> crate::Result<()> { + let mut free_indices: [bool; u8::MAX as usize] = [true; u8::MAX as usize]; + + for FragmentConfig { id, name: _ } in self.0.iter() { + free_indices[*id as usize] = false; + } + let mut free_indices = free_indices.iter_mut().enumerate(); + let mut find_free_index = + move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); + + let mut new_fragments = new_fragments.into_iter(); + + for name in &mut new_fragments { + let id = match find_free_index() { + Some(id) => id, + None => { + let more = (&mut new_fragments).count(); + return Err(UserError::TooManyFragments(u8::MAX as usize + more + 1).into()); + } + }; + self.0.push(FragmentConfig { id, name }); + } + Ok(()) + } +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct FragmentConfig { + pub id: u8, + pub name: String, +} + +pub struct IndexEmbeddingConfigs { + main: Database, + embedder_info: Database, +} + +pub struct EmbedderInfo { + pub embedder_id: u8, + pub embedding_status: EmbeddingStatus, +} + +impl EmbedderInfo { + pub fn to_bytes(&self) -> Result, heed::BoxedError> { + EmbedderInfoCodec::bytes_encode(self) + } +} + +/// Optimized struct to hold the list of documents that are `user_provided` and `must_regenerate`. +/// +/// Because most documents have the same value for `user_provided` and `must_regenerate`, we store only +/// the `user_provided` and a list of the documents for which `must_regenerate` assumes the other value +/// than `user_provided`. +#[derive(Default)] +pub struct EmbeddingStatus { + user_provided: RoaringBitmap, + skip_regenerate_different_from_user_provided: RoaringBitmap, +} + +impl EmbeddingStatus { + pub fn new() -> Self { + Default::default() + } + + /// Whether the document contains user-provided vectors for that embedder. + pub fn is_user_provided(&self, docid: DocumentId) -> bool { + self.user_provided.contains(docid) + } + /// Whether vectors should be regenerated for that document and that embedder. + pub fn must_regenerate(&self, docid: DocumentId) -> bool { + let invert = self.skip_regenerate_different_from_user_provided.contains(docid); + let user_provided = self.user_provided.contains(docid); + !(user_provided ^ invert) + } + + pub fn is_user_provided_must_regenerate(&self, docid: DocumentId) -> (bool, bool) { + let invert = self.skip_regenerate_different_from_user_provided.contains(docid); + let user_provided = self.user_provided.contains(docid); + (user_provided, !(user_provided ^ invert)) + } + + pub fn user_provided_docids(&self) -> &RoaringBitmap { + &self.user_provided + } + + pub fn skip_regenerate_docids(&self) -> RoaringBitmap { + &self.user_provided ^ &self.skip_regenerate_different_from_user_provided + } + + pub(crate) fn into_user_provided(self) -> RoaringBitmap { + self.user_provided + } +} + +#[derive(Default)] +pub struct EmbeddingStatusDelta { + del_status: EmbeddingStatus, + add_status: EmbeddingStatus, +} + +impl EmbeddingStatusDelta { + pub fn new() -> Self { + Self::default() + } + + pub fn needs_change( + old_is_user_provided: bool, + old_must_regenerate: bool, + new_is_user_provided: bool, + new_must_regenerate: bool, + ) -> bool { + let old_skip_regenerate_different_user_provided = + old_is_user_provided == old_must_regenerate; + let new_skip_regenerate_different_user_provided = + new_is_user_provided == new_must_regenerate; + + old_is_user_provided != new_is_user_provided + || old_skip_regenerate_different_user_provided + != new_skip_regenerate_different_user_provided + } + + pub fn needs_clear(is_user_provided: bool, must_regenerate: bool) -> bool { + Self::needs_change(is_user_provided, must_regenerate, false, true) + } + + pub fn clear_docid( + &mut self, + docid: DocumentId, + is_user_provided: bool, + must_regenerate: bool, + ) { + self.push_delta(docid, is_user_provided, must_regenerate, false, true); + } + + pub fn push_delta( + &mut self, + docid: DocumentId, + old_is_user_provided: bool, + old_must_regenerate: bool, + new_is_user_provided: bool, + new_must_regenerate: bool, + ) { + // must_regenerate == !skip_regenerate + let old_skip_regenerate_different_user_provided = + old_is_user_provided == old_must_regenerate; + let new_skip_regenerate_different_user_provided = + new_is_user_provided == new_must_regenerate; + + match (old_is_user_provided, new_is_user_provided) { + (true, true) | (false, false) => { /* no change */ } + (true, false) => { + self.del_status.user_provided.insert(docid); + } + (false, true) => { + self.add_status.user_provided.insert(docid); + } + } + + match ( + old_skip_regenerate_different_user_provided, + new_skip_regenerate_different_user_provided, + ) { + (true, true) | (false, false) => { /* no change */ } + (true, false) => { + self.del_status.skip_regenerate_different_from_user_provided.insert(docid); + } + (false, true) => { + self.add_status.skip_regenerate_different_from_user_provided.insert(docid); + } + } + } + + pub fn push_new(&mut self, docid: DocumentId, is_user_provided: bool, must_regenerate: bool) { + self.push_delta( + docid, + !is_user_provided, + !must_regenerate, + is_user_provided, + must_regenerate, + ); + } + + pub fn apply_to(&self, status: &mut EmbeddingStatus) { + status.user_provided -= &self.del_status.user_provided; + status.user_provided |= &self.add_status.user_provided; + + status.skip_regenerate_different_from_user_provided -= + &self.del_status.skip_regenerate_different_from_user_provided; + status.skip_regenerate_different_from_user_provided |= + &self.add_status.skip_regenerate_different_from_user_provided; + } +} + +struct EmbedderInfoCodec; + +impl<'a> heed::BytesDecode<'a> for EmbedderInfoCodec { + type DItem = EmbedderInfo; + + fn bytes_decode(mut bytes: &'a [u8]) -> Result { + let embedder_id = bytes.read_u8()?; + // Support all version that didn't store the embedding status + if bytes.is_empty() { + return Ok(EmbedderInfo { embedder_id, embedding_status: EmbeddingStatus::new() }); + } + let first_bitmap_size = bytes.read_u32::()?; + let first_bitmap_bytes = &bytes[..first_bitmap_size as usize]; + let user_provided = CboRoaringBitmapCodec::bytes_decode(first_bitmap_bytes)?; + let skip_regenerate_different_from_user_provided = + CboRoaringBitmapCodec::bytes_decode(&bytes[first_bitmap_size as usize..])?; + Ok(EmbedderInfo { + embedder_id, + embedding_status: EmbeddingStatus { + user_provided, + skip_regenerate_different_from_user_provided, + }, + }) + } +} + +impl<'a> heed::BytesEncode<'a> for EmbedderInfoCodec { + type EItem = EmbedderInfo; + + fn bytes_encode(item: &'a Self::EItem) -> Result, heed::BoxedError> { + let first_bitmap_size = + CboRoaringBitmapCodec::serialized_size(&item.embedding_status.user_provided); + let second_bitmap_size = CboRoaringBitmapCodec::serialized_size( + &item.embedding_status.skip_regenerate_different_from_user_provided, + ); + + let mut bytes = Vec::with_capacity(1 + 4 + first_bitmap_size + second_bitmap_size); + bytes.write_u8(item.embedder_id)?; + bytes.write_u32::(first_bitmap_size.try_into()?)?; + CboRoaringBitmapCodec::serialize_into_writer( + &item.embedding_status.user_provided, + &mut bytes, + )?; + CboRoaringBitmapCodec::serialize_into_writer( + &item.embedding_status.skip_regenerate_different_from_user_provided, + &mut bytes, + )?; + Ok(bytes.into()) + } +} + +impl IndexEmbeddingConfigs { + pub(crate) fn new( + main: Database, + embedder_info: Database, + ) -> Self { + Self { main, embedder_info: embedder_info.remap_types() } + } + + pub(crate) fn put_embedding_configs( + &self, + wtxn: &mut RwTxn<'_>, + configs: Vec, + ) -> heed::Result<()> { + self.main.remap_types::>>().put( + wtxn, + crate::index::main_key::EMBEDDING_CONFIGS, + &configs, + ) + } + + pub(crate) fn delete_embedding_configs(&self, wtxn: &mut RwTxn<'_>) -> heed::Result { + self.main.remap_key_type::().delete(wtxn, crate::index::main_key::EMBEDDING_CONFIGS) + } + + pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> heed::Result> { + Ok(self + .main + .remap_types::>>() + .get(rtxn, crate::index::main_key::EMBEDDING_CONFIGS)? + .unwrap_or_default()) + } + + pub fn embedder_id(&self, rtxn: &RoTxn<'_>, name: &str) -> heed::Result> { + self.embedder_info.remap_data_type::().get(rtxn, name) + } + + pub fn put_fresh_embedder_id( + &self, + wtxn: &mut RwTxn<'_>, + name: &str, + embedder_id: u8, + ) -> heed::Result<()> { + let info = EmbedderInfo { embedder_id, embedding_status: EmbeddingStatus::new() }; + self.put_embedder_info(wtxn, name, &info) + } + + /// Iterate through the passed list of embedder names, associating a fresh embedder id to any new names. + /// + /// Passing the name of a currently existing embedder is not an error, and will not modify its embedder id, + /// so it is not necessary to differentiate between new and existing embedders before calling this function. + pub fn add_new_embedders<'a>( + &self, + wtxn: &mut RwTxn<'_>, + embedder_names: impl IntoIterator, + total_embedder_count: usize, + ) -> crate::Result<()> { + let mut free_indices: [bool; u8::MAX as usize] = [true; u8::MAX as usize]; + + for res in self.embedder_info.iter(wtxn)? { + let (_name, EmbedderInfo { embedder_id, embedding_status: _ }) = res?; + free_indices[embedder_id as usize] = false; + } + + let mut free_indices = free_indices.iter_mut().enumerate(); + let mut find_free_index = + move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); + + for embedder_name in embedder_names { + if self.embedder_id(wtxn, embedder_name)?.is_some() { + continue; + } + let embedder_id = find_free_index() + .ok_or(crate::UserError::TooManyEmbedders(total_embedder_count))?; + tracing::debug!( + embedder = embedder_name, + embedder_id, + "assigning free id to new embedder" + ); + self.put_fresh_embedder_id(wtxn, embedder_name, embedder_id)?; + } + Ok(()) + } + + pub fn embedder_info( + &self, + rtxn: &RoTxn<'_>, + name: &str, + ) -> heed::Result> { + self.embedder_info.get(rtxn, name) + } + + /// Clear the list of docids that are `user_provided` or `must_regenerate` across all embedders. + pub fn clear_embedder_info_docids(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<()> { + let mut it = self.embedder_info.iter_mut(wtxn)?; + while let Some(res) = it.next() { + let (embedder_name, info) = res?; + let embedder_name = embedder_name.to_owned(); + // SAFETY: we copied the `embedder_name` so are not using the reference while using put + unsafe { + it.put_current( + &embedder_name, + &EmbedderInfo { + embedder_id: info.embedder_id, + embedding_status: EmbeddingStatus::new(), + }, + )?; + } + } + Ok(()) + } + + pub fn iter_embedder_info<'a>( + &self, + rtxn: &'a RoTxn<'_>, + ) -> heed::Result>> { + self.embedder_info.iter(rtxn) + } + + pub fn iter_embedder_id<'a>( + &self, + rtxn: &'a RoTxn<'_>, + ) -> heed::Result>> { + self.embedder_info.remap_data_type::().iter(rtxn) + } + + pub fn remove_embedder( + &self, + wtxn: &mut RwTxn<'_>, + name: &str, + ) -> heed::Result> { + let info = self.embedder_info.get(wtxn, name)?; + self.embedder_info.delete(wtxn, name)?; + Ok(info) + } + + pub fn put_embedder_info( + &self, + wtxn: &mut RwTxn<'_>, + name: &str, + info: &EmbedderInfo, + ) -> heed::Result<()> { + self.embedder_info.put(wtxn, name, info) + } +} diff --git a/crates/milli/src/vector/error.rs b/crates/milli/src/vector/error.rs index 685022de8..0d737cbfc 100644 --- a/crates/milli/src/vector/error.rs +++ b/crates/milli/src/vector/error.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; use bumpalo::Bump; use hf_hub::api::sync::ApiError; +use itertools::Itertools as _; use super::parsed_vectors::ParsedVectorsDiff; use super::rest::ConfigurationSource; @@ -101,6 +102,32 @@ pub enum EmbedErrorKind { MissingEmbedding, #[error(transparent)] PanicInThreadPool(#[from] PanicCatched), + #[error("`media` requested but the configuration doesn't have source `rest`")] + RestMediaNotARest, + #[error("`media` requested, and the configuration has source `rest`, but the configuration doesn't have `searchFragments`.")] + RestMediaNotAFragment, + + #[error("Query matches multiple search fragments.\n - Note: First matched fragment `{name}`.\n - Note: Second matched fragment `{second_name}`.\n - Note: {}", + { + serde_json::json!({ + "q": q, + "media": media + }) + })] + RestSearchMatchesMultipleFragments { + name: String, + second_name: String, + q: Option, + media: Option, + }, + #[error("Query matches no search fragment.\n - Note: {}", + { + serde_json::json!({ + "q": q, + "media": media + }) + })] + RestSearchMatchesNoFragment { q: Option, media: Option }, } fn option_info(info: Option<&str>, prefix: &str) -> String { @@ -210,6 +237,44 @@ impl EmbedError { pub(crate) fn rest_extraction_error(error: String) -> EmbedError { Self { kind: EmbedErrorKind::RestExtractionError(error), fault: FaultSource::Runtime } } + + pub(crate) fn rest_media_not_a_rest() -> EmbedError { + Self { kind: EmbedErrorKind::RestMediaNotARest, fault: FaultSource::User } + } + + pub(crate) fn rest_media_not_a_fragment() -> EmbedError { + Self { kind: EmbedErrorKind::RestMediaNotAFragment, fault: FaultSource::User } + } + + pub(crate) fn rest_search_matches_multiple_fragments( + name: &str, + second_name: &str, + q: Option<&str>, + media: Option<&serde_json::Value>, + ) -> EmbedError { + Self { + kind: EmbedErrorKind::RestSearchMatchesMultipleFragments { + name: name.to_string(), + second_name: second_name.to_string(), + q: q.map(String::from), + media: media.cloned(), + }, + fault: FaultSource::User, + } + } + + pub(crate) fn rest_search_matches_no_fragment( + q: Option<&str>, + media: Option<&serde_json::Value>, + ) -> EmbedError { + Self { + kind: EmbedErrorKind::RestSearchMatchesNoFragment { + q: q.map(String::from), + media: media.cloned(), + }, + fault: FaultSource::User, + } + } } #[derive(Debug, thiserror::Error)] @@ -382,6 +447,49 @@ impl NewEmbedderError { fault: FaultSource::User, } } + + pub(crate) fn rest_cannot_infer_dimensions_for_fragment() -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::RestCannotInferDimensionsForFragment, + fault: FaultSource::User, + } + } + + pub(crate) fn rest_inconsistent_fragments( + indexing_fragments_is_empty: bool, + indexing_fragments: BTreeMap, + search_fragments: BTreeMap, + ) -> NewEmbedderError { + let message = if indexing_fragments_is_empty { + format!("`indexingFragments` is empty, but `searchFragments` declares {} fragments: {}{}\n - Hint: declare at least one fragment in `indexingFragments` or remove fragments from `searchFragments` by setting them to `null`", + search_fragments.len(), + search_fragments.keys().take(3).join(", "), if search_fragments.len() > 3 { ", ..." } else { "" } + ) + } else { + format!("`searchFragments` is empty, but `indexingFragments` declares {} fragments: {}{}\n - Hint: declare at least one fragment in `searchFragments` or remove fragments from `indexingFragments` by setting them to `null`", + indexing_fragments.len(), + indexing_fragments.keys().take(3).join(", "), if indexing_fragments.len() > 3 { ", ..." } else { "" } + ) + }; + + Self { + kind: NewEmbedderErrorKind::RestInconsistentFragments { message }, + fault: FaultSource::User, + } + } + + pub(crate) fn rest_document_template_and_fragments( + indexing_fragments_len: usize, + search_fragments_len: usize, + ) -> Self { + Self { + kind: NewEmbedderErrorKind::RestDocumentTemplateAndFragments { + indexing_fragments_len, + search_fragments_len, + }, + fault: FaultSource::User, + } + } } #[derive(Debug, Clone, Copy)] @@ -499,6 +607,12 @@ pub enum NewEmbedderErrorKind { CompositeEmbeddingCountMismatch { search_count: usize, index_count: usize }, #[error("error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance {distance:.2}\n - Meilisearch requires a maximum distance of {MAX_COMPOSITE_DISTANCE}.\n - Note: check that both embedders produce similar embeddings.{hint}")] CompositeEmbeddingValueMismatch { distance: f32, hint: CompositeEmbedderContainsHuggingFace }, + #[error("cannot infer `dimensions` for an embedder using `indexingFragments`.\n - Note: Specify `dimensions` explicitly or don't use `indexingFragments`.")] + RestCannotInferDimensionsForFragment, + #[error("inconsistent fragments: {message}")] + RestInconsistentFragments { message: String }, + #[error("cannot pass both fragments and a document template.\n - Note: {indexing_fragments_len} fragments declared in `indexingFragments` and {search_fragments_len} fragments declared in `search_fragments_len`.\n - Hint: remove the declared fragments or remove the `documentTemplate`")] + RestDocumentTemplateAndFragments { indexing_fragments_len: usize, search_fragments_len: usize }, } pub struct PossibleEmbeddingMistakes { diff --git a/crates/milli/src/vector/extractor.rs b/crates/milli/src/vector/extractor.rs new file mode 100644 index 000000000..2ab541ac1 --- /dev/null +++ b/crates/milli/src/vector/extractor.rs @@ -0,0 +1,244 @@ +use std::cell::RefCell; +use std::collections::BTreeMap; +use std::fmt::Debug; + +use bumpalo::Bump; +use serde_json::Value; + +use super::json_template::{self, JsonTemplate}; +use crate::prompt::error::RenderPromptError; +use crate::prompt::Prompt; +use crate::update::new::document::Document; +use crate::vector::RuntimeFragment; +use crate::GlobalFieldsIdsMap; + +/// Trait for types that extract embedder inputs from a document. +/// +/// An embedder input can then be sent to an embedder by using an [`super::session::EmbedSession`]. +pub trait Extractor<'doc> { + /// The embedder input that is extracted from documents by this extractor. + /// + /// The inputs have to be comparable for equality so that diffing is possible. + type Input: PartialEq; + + /// The error that can happen while extracting from a document. + type Error; + + /// Metadata associated with a document. + type DocumentMetadata; + + /// Extract the embedder input from a document and its metadata. + fn extract<'a, D: Document<'a> + Debug>( + &self, + doc: D, + meta: &Self::DocumentMetadata, + ) -> Result, Self::Error>; + + /// Unique `id` associated with this extractor. + /// + /// This will serve to decide where to store the vectors in the vector store. + /// The id should be stable for a given extractor. + fn extractor_id(&self) -> u8; + + /// The result of diffing the embedder inputs extracted from two versions of a document. + /// + /// # Parameters + /// + /// - `old`: old version of the document + /// - `new`: new version of the document + /// - `meta`: metadata associated to the document + fn diff_documents<'a, OD: Document<'a> + Debug, ND: Document<'a> + Debug>( + &self, + old: OD, + new: ND, + meta: &Self::DocumentMetadata, + ) -> Result, Self::Error> + where + 'doc: 'a, + { + let old_input = self.extract(old, meta); + let new_input = self.extract(new, meta); + to_diff(old_input, new_input) + } + + /// The result of diffing the embedder inputs extracted from a document by two versions of this extractor. + /// + /// # Parameters + /// + /// - `doc`: the document from which to extract the embedder inputs + /// - `meta`: metadata associated to the document + /// - `old`: If `Some`, the old version of this extractor. If `None`, this is equivalent to calling `ExtractorDiff::Added(self.extract(_))`. + fn diff_settings<'a, D: Document<'a> + Debug>( + &self, + doc: D, + meta: &Self::DocumentMetadata, + old: Option<&Self>, + ) -> Result, Self::Error> { + let old_input = if let Some(old) = old { old.extract(&doc, meta) } else { Ok(None) }; + let new_input = self.extract(&doc, meta); + + to_diff(old_input, new_input) + } + + /// Returns an extractor wrapping `self` and set to ignore all errors arising from extracting with this extractor. + fn ignore_errors(self) -> IgnoreErrorExtractor + where + Self: Sized, + { + IgnoreErrorExtractor(self) + } +} + +fn to_diff( + old_input: Result, E>, + new_input: Result, E>, +) -> Result, E> { + let old_input = old_input.ok().unwrap_or(None); + let new_input = new_input?; + Ok(match (old_input, new_input) { + (Some(old), Some(new)) if old == new => ExtractorDiff::Unchanged, + (None, None) => ExtractorDiff::Unchanged, + (None, Some(input)) => ExtractorDiff::Added(input), + (Some(_), None) => ExtractorDiff::Removed, + (Some(_), Some(input)) => ExtractorDiff::Updated(input), + }) +} + +pub enum ExtractorDiff { + Removed, + Added(Input), + Updated(Input), + Unchanged, +} + +impl ExtractorDiff { + pub fn into_input(self) -> Option { + match self { + ExtractorDiff::Removed => None, + ExtractorDiff::Added(input) => Some(input), + ExtractorDiff::Updated(input) => Some(input), + ExtractorDiff::Unchanged => None, + } + } + + pub fn needs_change(&self) -> bool { + match self { + ExtractorDiff::Removed => true, + ExtractorDiff::Added(_) => true, + ExtractorDiff::Updated(_) => true, + ExtractorDiff::Unchanged => false, + } + } + + pub fn into_list_of_changes( + named_diffs: impl IntoIterator, + ) -> BTreeMap> { + named_diffs + .into_iter() + .filter(|(_, diff)| diff.needs_change()) + .map(|(name, diff)| (name, diff.into_input())) + .collect() + } +} + +pub struct DocumentTemplateExtractor<'a, 'b, 'c> { + doc_alloc: &'a Bump, + field_id_map: &'a RefCell>, + template: &'c Prompt, +} + +impl<'a, 'b, 'c> DocumentTemplateExtractor<'a, 'b, 'c> { + pub fn new( + template: &'c Prompt, + doc_alloc: &'a Bump, + field_id_map: &'a RefCell>, + ) -> Self { + Self { template, doc_alloc, field_id_map } + } +} + +impl<'doc> Extractor<'doc> for DocumentTemplateExtractor<'doc, '_, '_> { + type DocumentMetadata = &'doc str; + type Input = &'doc str; + type Error = RenderPromptError; + + fn extractor_id(&self) -> u8 { + 0 + } + + fn extract<'a, D: Document<'a> + Debug>( + &self, + doc: D, + external_docid: &Self::DocumentMetadata, + ) -> Result, Self::Error> { + Ok(Some(self.template.render_document( + external_docid, + doc, + self.field_id_map, + self.doc_alloc, + )?)) + } +} + +pub struct RequestFragmentExtractor<'a> { + fragment: &'a JsonTemplate, + extractor_id: u8, + doc_alloc: &'a Bump, +} + +impl<'a> RequestFragmentExtractor<'a> { + pub fn new(fragment: &'a RuntimeFragment, doc_alloc: &'a Bump) -> Self { + Self { fragment: &fragment.template, extractor_id: fragment.id, doc_alloc } + } +} + +impl<'doc> Extractor<'doc> for RequestFragmentExtractor<'doc> { + type DocumentMetadata = (); + type Input = Value; + type Error = json_template::Error; + + fn extractor_id(&self) -> u8 { + self.extractor_id + } + + fn extract<'a, D: Document<'a> + Debug>( + &self, + doc: D, + _meta: &Self::DocumentMetadata, + ) -> Result, Self::Error> { + Ok(Some(self.fragment.render_document(doc, self.doc_alloc)?)) + } +} + +pub struct IgnoreErrorExtractor(E); + +impl<'doc, E> Extractor<'doc> for IgnoreErrorExtractor +where + E: Extractor<'doc>, +{ + type DocumentMetadata = E::DocumentMetadata; + type Input = E::Input; + + type Error = Infallible; + + fn extractor_id(&self) -> u8 { + self.0.extractor_id() + } + + fn extract<'a, D: Document<'a> + Debug>( + &self, + doc: D, + meta: &Self::DocumentMetadata, + ) -> Result, Self::Error> { + Ok(self.0.extract(doc, meta).ok().flatten()) + } +} + +#[derive(Debug)] +pub enum Infallible {} + +impl From for crate::Error { + fn from(_: Infallible) -> Self { + unreachable!("Infallible values cannot be built") + } +} diff --git a/crates/milli/src/vector/json_template.rs b/crates/milli/src/vector/json_template/injectable_value.rs similarity index 84% rename from crates/milli/src/vector/json_template.rs rename to crates/milli/src/vector/json_template/injectable_value.rs index 179cbe9af..ec7d900db 100644 --- a/crates/milli/src/vector/json_template.rs +++ b/crates/milli/src/vector/json_template/injectable_value.rs @@ -1,20 +1,17 @@ -//! Module to manipulate JSON templates. +//! Module to manipulate JSON values containing placeholder strings. //! //! This module allows two main operations: -//! 1. Render JSON values from a template and a context value. -//! 2. Retrieve data from a template and JSON values. - -#![warn(rustdoc::broken_intra_doc_links)] -#![warn(missing_docs)] +//! 1. Render JSON values from a template value containing placeholders and a value to inject. +//! 2. Extract data from a template value containing placeholders and a concrete JSON value that fits the template value. use serde::Deserialize; use serde_json::{Map, Value}; -type ValuePath = Vec; +use super::{format_value, inject_value, path_with_root, PathComponent, ValuePath}; /// Encapsulates a JSON template and allows injecting and extracting values from it. #[derive(Debug)] -pub struct ValueTemplate { +pub struct InjectableValue { template: Value, value_kind: ValueKind, } @@ -32,34 +29,13 @@ struct ArrayPath { value_path_in_array: ValuePath, } -/// Component of a path to a Value -#[derive(Debug, Clone)] -pub enum PathComponent { - /// A key inside of an object - MapKey(String), - /// An index inside of an array - ArrayIndex(usize), -} - -impl PartialEq for PathComponent { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::MapKey(l0), Self::MapKey(r0)) => l0 == r0, - (Self::ArrayIndex(l0), Self::ArrayIndex(r0)) => l0 == r0, - _ => false, - } - } -} - -impl Eq for PathComponent {} - -/// Error that occurs when no few value was provided to a template for injection. +/// Error that occurs when no value was provided to a template for injection. #[derive(Debug)] pub struct MissingValue; -/// Error that occurs when trying to parse a template in [`ValueTemplate::new`] +/// Error that occurs when trying to parse a template in [`InjectableValue::new`] #[derive(Debug)] -pub enum TemplateParsingError { +pub enum InjectableParsingError { /// A repeat string appears inside a repeated value NestedRepeatString(ValuePath), /// A repeat string appears outside of an array @@ -85,42 +61,42 @@ pub enum TemplateParsingError { }, } -impl TemplateParsingError { +impl InjectableParsingError { /// Produce an error message from the error kind, the name of the root object, the placeholder string and the repeat string pub fn error_message(&self, root: &str, placeholder: &str, repeat: &str) -> String { match self { - TemplateParsingError::NestedRepeatString(path) => { + InjectableParsingError::NestedRepeatString(path) => { format!( r#"in {}: "{repeat}" appears nested inside of a value that is itself repeated"#, path_with_root(root, path) ) } - TemplateParsingError::RepeatStringNotInArray(path) => format!( + InjectableParsingError::RepeatStringNotInArray(path) => format!( r#"in {}: "{repeat}" appears outside of an array"#, path_with_root(root, path) ), - TemplateParsingError::BadIndexForRepeatString(path, index) => format!( + InjectableParsingError::BadIndexForRepeatString(path, index) => format!( r#"in {}: "{repeat}" expected at position #1, but found at position #{index}"#, path_with_root(root, path) ), - TemplateParsingError::MissingPlaceholderInRepeatedValue(path) => format!( + InjectableParsingError::MissingPlaceholderInRepeatedValue(path) => format!( r#"in {}: Expected "{placeholder}" inside of the repeated value"#, path_with_root(root, path) ), - TemplateParsingError::MultipleRepeatString(current, previous) => format!( + InjectableParsingError::MultipleRepeatString(current, previous) => format!( r#"in {}: Found "{repeat}", but it was already present in {}"#, path_with_root(root, current), path_with_root(root, previous) ), - TemplateParsingError::MultiplePlaceholderString(current, previous) => format!( + InjectableParsingError::MultiplePlaceholderString(current, previous) => format!( r#"in {}: Found "{placeholder}", but it was already present in {}"#, path_with_root(root, current), path_with_root(root, previous) ), - TemplateParsingError::MissingPlaceholderString => { + InjectableParsingError::MissingPlaceholderString => { format!(r#"in `{root}`: "{placeholder}" not found"#) } - TemplateParsingError::BothArrayAndSingle { + InjectableParsingError::BothArrayAndSingle { single_path, path_to_array, array_to_placeholder, @@ -140,41 +116,41 @@ impl TemplateParsingError { fn prepend_path(self, mut prepended_path: ValuePath) -> Self { match self { - TemplateParsingError::NestedRepeatString(mut path) => { + InjectableParsingError::NestedRepeatString(mut path) => { prepended_path.append(&mut path); - TemplateParsingError::NestedRepeatString(prepended_path) + InjectableParsingError::NestedRepeatString(prepended_path) } - TemplateParsingError::RepeatStringNotInArray(mut path) => { + InjectableParsingError::RepeatStringNotInArray(mut path) => { prepended_path.append(&mut path); - TemplateParsingError::RepeatStringNotInArray(prepended_path) + InjectableParsingError::RepeatStringNotInArray(prepended_path) } - TemplateParsingError::BadIndexForRepeatString(mut path, index) => { + InjectableParsingError::BadIndexForRepeatString(mut path, index) => { prepended_path.append(&mut path); - TemplateParsingError::BadIndexForRepeatString(prepended_path, index) + InjectableParsingError::BadIndexForRepeatString(prepended_path, index) } - TemplateParsingError::MissingPlaceholderInRepeatedValue(mut path) => { + InjectableParsingError::MissingPlaceholderInRepeatedValue(mut path) => { prepended_path.append(&mut path); - TemplateParsingError::MissingPlaceholderInRepeatedValue(prepended_path) + InjectableParsingError::MissingPlaceholderInRepeatedValue(prepended_path) } - TemplateParsingError::MultipleRepeatString(mut path, older_path) => { + InjectableParsingError::MultipleRepeatString(mut path, older_path) => { let older_prepended_path = prepended_path.iter().cloned().chain(older_path).collect(); prepended_path.append(&mut path); - TemplateParsingError::MultipleRepeatString(prepended_path, older_prepended_path) + InjectableParsingError::MultipleRepeatString(prepended_path, older_prepended_path) } - TemplateParsingError::MultiplePlaceholderString(mut path, older_path) => { + InjectableParsingError::MultiplePlaceholderString(mut path, older_path) => { let older_prepended_path = prepended_path.iter().cloned().chain(older_path).collect(); prepended_path.append(&mut path); - TemplateParsingError::MultiplePlaceholderString( + InjectableParsingError::MultiplePlaceholderString( prepended_path, older_prepended_path, ) } - TemplateParsingError::MissingPlaceholderString => { - TemplateParsingError::MissingPlaceholderString + InjectableParsingError::MissingPlaceholderString => { + InjectableParsingError::MissingPlaceholderString } - TemplateParsingError::BothArrayAndSingle { + InjectableParsingError::BothArrayAndSingle { single_path, mut path_to_array, array_to_placeholder, @@ -184,7 +160,7 @@ impl TemplateParsingError { prepended_path.iter().cloned().chain(single_path).collect(); prepended_path.append(&mut path_to_array); // we don't prepend the array_to_placeholder path as it is the array path that is prepended - TemplateParsingError::BothArrayAndSingle { + InjectableParsingError::BothArrayAndSingle { single_path: single_prepended_path, path_to_array: prepended_path, array_to_placeholder, @@ -194,7 +170,7 @@ impl TemplateParsingError { } } -/// Error that occurs when [`ValueTemplate::extract`] fails. +/// Error that occurs when [`InjectableValue::extract`] fails. #[derive(Debug)] pub struct ExtractionError { /// The cause of the failure @@ -336,27 +312,6 @@ enum LastNamedObject<'a> { NestedArrayInsideObject { object_name: &'a str, index: usize, nesting_level: usize }, } -/// Builds a string representation of a path, preprending the name of the root value. -pub fn path_with_root<'a>( - root: &str, - path: impl IntoIterator + 'a, -) -> String { - use std::fmt::Write as _; - let mut res = format!("`{root}"); - for component in path.into_iter() { - match component { - PathComponent::MapKey(key) => { - let _ = write!(&mut res, ".{key}"); - } - PathComponent::ArrayIndex(index) => { - let _ = write!(&mut res, "[{index}]"); - } - } - } - res.push('`'); - res -} - /// Context where an extraction failure happened /// /// The operation that failed @@ -405,7 +360,7 @@ enum ArrayParsingContext<'a> { NotNested(&'a mut Option), } -impl ValueTemplate { +impl InjectableValue { /// Prepare a template for injection or extraction. /// /// # Parameters @@ -419,12 +374,12 @@ impl ValueTemplate { /// /// # Errors /// - /// - [`TemplateParsingError`]: refer to the documentation of this type + /// - [`InjectableParsingError`]: refer to the documentation of this type pub fn new( template: Value, placeholder_string: &str, repeat_string: &str, - ) -> Result { + ) -> Result { let mut value_path = None; let mut array_path = None; let mut current_path = Vec::new(); @@ -438,11 +393,11 @@ impl ValueTemplate { )?; let value_kind = match (array_path, value_path) { - (None, None) => return Err(TemplateParsingError::MissingPlaceholderString), + (None, None) => return Err(InjectableParsingError::MissingPlaceholderString), (None, Some(value_path)) => ValueKind::Single(value_path), (Some(array_path), None) => ValueKind::Array(array_path), (Some(array_path), Some(value_path)) => { - return Err(TemplateParsingError::BothArrayAndSingle { + return Err(InjectableParsingError::BothArrayAndSingle { single_path: value_path, path_to_array: array_path.path_to_array, array_to_placeholder: array_path.value_path_in_array, @@ -564,29 +519,29 @@ impl ValueTemplate { value_path: &mut Option, mut array_path: &mut ArrayParsingContext, current_path: &mut ValuePath, - ) -> Result<(), TemplateParsingError> { + ) -> Result<(), InjectableParsingError> { // two modes for parsing array. match array { // 1. array contains a repeat string in second position [first, second, rest @ ..] if second == repeat_string => { let ArrayParsingContext::NotNested(array_path) = &mut array_path else { - return Err(TemplateParsingError::NestedRepeatString(current_path.clone())); + return Err(InjectableParsingError::NestedRepeatString(current_path.clone())); }; if let Some(array_path) = array_path { - return Err(TemplateParsingError::MultipleRepeatString( + return Err(InjectableParsingError::MultipleRepeatString( current_path.clone(), array_path.path_to_array.clone(), )); } if first == repeat_string { - return Err(TemplateParsingError::BadIndexForRepeatString( + return Err(InjectableParsingError::BadIndexForRepeatString( current_path.clone(), 0, )); } if let Some(position) = rest.iter().position(|value| value == repeat_string) { let position = position + 2; - return Err(TemplateParsingError::BadIndexForRepeatString( + return Err(InjectableParsingError::BadIndexForRepeatString( current_path.clone(), position, )); @@ -609,7 +564,9 @@ impl ValueTemplate { value_path.ok_or_else(|| { let mut repeated_value_path = current_path.clone(); repeated_value_path.push(PathComponent::ArrayIndex(0)); - TemplateParsingError::MissingPlaceholderInRepeatedValue(repeated_value_path) + InjectableParsingError::MissingPlaceholderInRepeatedValue( + repeated_value_path, + ) })? }; **array_path = Some(ArrayPath { @@ -621,7 +578,7 @@ impl ValueTemplate { // 2. array does not contain a repeat string array => { if let Some(position) = array.iter().position(|value| value == repeat_string) { - return Err(TemplateParsingError::BadIndexForRepeatString( + return Err(InjectableParsingError::BadIndexForRepeatString( current_path.clone(), position, )); @@ -650,7 +607,7 @@ impl ValueTemplate { value_path: &mut Option, array_path: &mut ArrayParsingContext, current_path: &mut ValuePath, - ) -> Result<(), TemplateParsingError> { + ) -> Result<(), InjectableParsingError> { for (key, value) in object.iter() { current_path.push(PathComponent::MapKey(key.to_owned())); Self::parse_value( @@ -673,12 +630,12 @@ impl ValueTemplate { value_path: &mut Option, array_path: &mut ArrayParsingContext, current_path: &mut ValuePath, - ) -> Result<(), TemplateParsingError> { + ) -> Result<(), InjectableParsingError> { match value { Value::String(str) => { if placeholder_string == str { if let Some(value_path) = value_path { - return Err(TemplateParsingError::MultiplePlaceholderString( + return Err(InjectableParsingError::MultiplePlaceholderString( current_path.clone(), value_path.clone(), )); @@ -687,7 +644,9 @@ impl ValueTemplate { *value_path = Some(current_path.clone()); } if repeat_string == str { - return Err(TemplateParsingError::RepeatStringNotInArray(current_path.clone())); + return Err(InjectableParsingError::RepeatStringNotInArray( + current_path.clone(), + )); } } Value::Null | Value::Bool(_) | Value::Number(_) => {} @@ -712,27 +671,6 @@ impl ValueTemplate { } } -fn inject_value(rendered: &mut Value, injection_path: &Vec, injected_value: Value) { - let mut current_value = rendered; - for injection_component in injection_path { - current_value = match injection_component { - PathComponent::MapKey(key) => current_value.get_mut(key).unwrap(), - PathComponent::ArrayIndex(index) => current_value.get_mut(index).unwrap(), - } - } - *current_value = injected_value; -} - -fn format_value(value: &Value) -> String { - match value { - Value::Array(array) => format!("an array of size {}", array.len()), - Value::Object(object) => { - format!("an object with {} field(s)", object.len()) - } - value => value.to_string(), - } -} - fn extract_value( extraction_path: &[PathComponent], initial_value: &mut Value, @@ -838,10 +776,10 @@ impl ExtractionResultErrorContext for Result { mod test { use serde_json::{json, Value}; - use super::{PathComponent, TemplateParsingError, ValueTemplate}; + use super::{InjectableParsingError, InjectableValue, PathComponent}; - fn new_template(template: Value) -> Result { - ValueTemplate::new(template, "{{text}}", "{{..}}") + fn new_template(template: Value) -> Result { + InjectableValue::new(template, "{{text}}", "{{..}}") } #[test] @@ -853,7 +791,7 @@ mod test { }); let error = new_template(template.clone()).unwrap_err(); - assert!(matches!(error, TemplateParsingError::MissingPlaceholderString)) + assert!(matches!(error, InjectableParsingError::MissingPlaceholderString)) } #[test] @@ -887,7 +825,7 @@ mod test { }); match new_template(template.clone()) { - Err(TemplateParsingError::MultiplePlaceholderString(left, right)) => { + Err(InjectableParsingError::MultiplePlaceholderString(left, right)) => { assert_eq!( left, vec![PathComponent::MapKey("titi".into()), PathComponent::ArrayIndex(3)] diff --git a/crates/milli/src/vector/json_template/mod.rs b/crates/milli/src/vector/json_template/mod.rs new file mode 100644 index 000000000..d7ce3e8f1 --- /dev/null +++ b/crates/milli/src/vector/json_template/mod.rs @@ -0,0 +1,282 @@ +//! Exposes types to manipulate JSON values +//! +//! - [`JsonTemplate`]: renders JSON values by rendering its strings as [`Template`]s. +//! - [`InjectableValue`]: Describes a JSON value containing placeholders, +//! then allows to inject values instead of the placeholder to produce new concrete JSON values, +//! or extract sub-values at the placeholder location from concrete JSON values. +//! +//! The module also exposes foundational types to work with JSON paths: +//! +//! - [`ValuePath`] is made of [`PathComponent`]s to indicate the location of a sub-value inside of a JSON value. +//! - [`inject_value`] is a primitive that replaces the sub-value at the described location by an injected value. + +#![warn(rustdoc::broken_intra_doc_links)] +#![warn(missing_docs)] + +use bumpalo::Bump; +use liquid::{Parser, Template}; +use serde_json::{Map, Value}; + +use crate::prompt::ParseableDocument; +use crate::update::new::document::Document; + +mod injectable_value; + +pub use injectable_value::InjectableValue; + +/// Represents a JSON [`Value`] where each string is rendered as a [`Template`]. +#[derive(Debug)] +pub struct JsonTemplate { + value: Value, + templates: Vec, +} + +impl Clone for JsonTemplate { + fn clone(&self) -> Self { + Self::new(self.value.clone()).unwrap() + } +} + +struct TemplateAtPath { + template: Template, + path: ValuePath, +} + +impl std::fmt::Debug for TemplateAtPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TemplateAtPath") + .field("template", &&"template") + .field("path", &self.path) + .finish() + } +} + +/// Error that can occur either when parsing the templates in the value, or when trying to render them. +#[derive(Debug)] +pub struct Error { + template_error: liquid::Error, + path: ValuePath, +} + +impl Error { + /// Produces an error message when the error happened at rendering time. + pub fn rendering_error(&self, root: &str) -> String { + format!( + "in `{}`, error while rendering template: {}", + path_with_root(root, self.path.iter()), + &self.template_error + ) + } + + /// Produces an error message when the error happened at parsing time. + pub fn parsing(&self, root: &str) -> String { + format!( + "in `{}`, error while parsing template: {}", + path_with_root(root, self.path.iter()), + &self.template_error + ) + } +} + +impl JsonTemplate { + /// Creates a new `JsonTemplate` by parsing all strings inside the value as templates. + /// + /// # Error + /// + /// - If any of the strings contains a template that cannot be parsed. + pub fn new(value: Value) -> Result { + let templates = build_templates(&value)?; + Ok(Self { value, templates }) + } + + /// Renders this value by replacing all its strings with the rendered version of the template they represent from the given context. + /// + /// # Error + /// + /// - If any of the strings contains a template that cannot be rendered with the given context. + pub fn render(&self, context: &dyn liquid::ObjectView) -> Result { + let mut rendered = self.value.clone(); + for TemplateAtPath { template, path } in &self.templates { + let injected_value = + template.render(context).map_err(|err| error_with_path(err, path.clone()))?; + inject_value(&mut rendered, path, Value::String(injected_value)); + } + Ok(rendered) + } + + /// Renders this value by replacing all its strings with the rendered version of the template they represent from the contents of the given document. + /// + /// # Error + /// + /// - If any of the strings contains a template that cannot be rendered with the given document. + pub fn render_document<'a, 'doc, D: Document<'a> + std::fmt::Debug>( + &self, + document: D, + doc_alloc: &'doc Bump, + ) -> Result { + let document = ParseableDocument::new(document, doc_alloc); + let context = crate::prompt::Context::without_fields(&document); + self.render(&context) + } + + /// Renders this value by replacing all its strings with the rendered version of the template they represent from the contents of the search query. + /// + /// # Error + /// + /// - If any of the strings contains a template that cannot be rendered from the contents of the search query + pub fn render_search(&self, q: Option<&str>, media: Option<&Value>) -> Result { + let search_data = match (q, media) { + (None, None) => liquid::object!({}), + (None, Some(media)) => liquid::object!({ "media": media }), + (Some(q), None) => liquid::object!({"q": q}), + (Some(q), Some(media)) => liquid::object!({"q": q, "media": media}), + }; + self.render(&search_data) + } + + /// The JSON value representing the underlying template + pub fn template(&self) -> &Value { + &self.value + } +} + +fn build_templates(value: &Value) -> Result, Error> { + let mut current_path = ValuePath::new(); + let mut templates = Vec::new(); + let compiler = liquid::ParserBuilder::with_stdlib().build().unwrap(); + parse_value(value, &mut current_path, &mut templates, &compiler)?; + Ok(templates) +} + +fn error_with_path(template_error: liquid::Error, path: ValuePath) -> Error { + Error { template_error, path } +} + +fn parse_value( + value: &Value, + current_path: &mut ValuePath, + templates: &mut Vec, + compiler: &Parser, +) -> Result<(), Error> { + match value { + Value::String(template) => { + let template = compiler + .parse(template) + .map_err(|err| error_with_path(err, current_path.clone()))?; + templates.push(TemplateAtPath { template, path: current_path.clone() }); + } + Value::Array(values) => { + parse_array(values, current_path, templates, compiler)?; + } + Value::Object(map) => { + parse_object(map, current_path, templates, compiler)?; + } + _ => {} + } + Ok(()) +} + +fn parse_object( + map: &Map, + current_path: &mut ValuePath, + templates: &mut Vec, + compiler: &Parser, +) -> Result<(), Error> { + for (key, value) in map { + current_path.push(PathComponent::MapKey(key.clone())); + parse_value(value, current_path, templates, compiler)?; + current_path.pop(); + } + Ok(()) +} + +fn parse_array( + values: &[Value], + current_path: &mut ValuePath, + templates: &mut Vec, + compiler: &Parser, +) -> Result<(), Error> { + for (index, value) in values.iter().enumerate() { + current_path.push(PathComponent::ArrayIndex(index)); + parse_value(value, current_path, templates, compiler)?; + current_path.pop(); + } + Ok(()) +} + +/// A list of [`PathComponent`]s describing a path to a value inside a JSON value. +/// +/// The empty list refers to the root value. +pub type ValuePath = Vec; + +/// Component of a path to a Value +#[derive(Debug, Clone)] +pub enum PathComponent { + /// A key inside of an object + MapKey(String), + /// An index inside of an array + ArrayIndex(usize), +} + +impl PartialEq for PathComponent { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::MapKey(l0), Self::MapKey(r0)) => l0 == r0, + (Self::ArrayIndex(l0), Self::ArrayIndex(r0)) => l0 == r0, + _ => false, + } + } +} + +impl Eq for PathComponent {} + +/// Builds a string representation of a path, preprending the name of the root value. +pub fn path_with_root<'a>( + root: &str, + path: impl IntoIterator + 'a, +) -> String { + use std::fmt::Write as _; + let mut res = format!("`{root}"); + for component in path.into_iter() { + match component { + PathComponent::MapKey(key) => { + let _ = write!(&mut res, ".{key}"); + } + PathComponent::ArrayIndex(index) => { + let _ = write!(&mut res, "[{index}]"); + } + } + } + res.push('`'); + res +} + +/// Modifies `rendered` to replace the sub-value at the `injection_path` location by the `injected_value`. +/// +/// # Panics +/// +/// - if the provided `injection_path` cannot be traversed in `rendered`. +pub fn inject_value( + rendered: &mut Value, + injection_path: &Vec, + injected_value: Value, +) { + let mut current_value = rendered; + for injection_component in injection_path { + current_value = match injection_component { + PathComponent::MapKey(key) => current_value.get_mut(key).unwrap(), + PathComponent::ArrayIndex(index) => current_value.get_mut(index).unwrap(), + } + } + *current_value = injected_value; +} + +fn format_value(value: &Value) -> String { + match value { + Value::Array(array) => format!("an array of size {}", array.len()), + Value::Object(object) => { + format!("an object with {} field(s)", object.len()) + } + value => value.to_string(), + } +} diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index c2978f5db..f64223e41 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -13,17 +13,22 @@ use serde::{Deserialize, Serialize}; use utoipa::ToSchema; use self::error::{EmbedError, NewEmbedderError}; -use crate::progress::Progress; +use crate::progress::{EmbedderStats, Progress}; use crate::prompt::{Prompt, PromptData}; +use crate::vector::composite::SubEmbedderOptions; +use crate::vector::json_template::JsonTemplate; use crate::ThreadPoolNoAbort; pub mod composite; +pub mod db; pub mod error; +pub mod extractor; pub mod hf; pub mod json_template; pub mod manual; pub mod openai; pub mod parsed_vectors; +pub mod session; pub mod settings; pub mod ollama; @@ -60,7 +65,7 @@ impl ArroyWrapper { rtxn: &'a RoTxn<'a>, db: arroy::Database, ) -> impl Iterator, arroy::Error>> + 'a { - arroy_db_range_for_embedder(self.embedder_index).map_while(move |index| { + arroy_store_range_for_embedder(self.embedder_index).filter_map(move |index| { match arroy::Reader::open(rtxn, index, db) { Ok(reader) => match reader.is_empty(rtxn) { Ok(false) => Some(Ok(reader)), @@ -73,12 +78,57 @@ impl ArroyWrapper { }) } - pub fn dimensions(&self, rtxn: &RoTxn) -> Result { - let first_id = arroy_db_range_for_embedder(self.embedder_index).next().unwrap(); + /// The item ids that are present in the store specified by its id. + /// + /// The ids are accessed via a lambda to avoid lifetime shenanigans. + pub fn items_in_store( + &self, + rtxn: &RoTxn, + store_id: u8, + with_items: F, + ) -> Result + where + F: FnOnce(&RoaringBitmap) -> O, + { if self.quantized { - Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions()) + self._items_in_store(rtxn, self.quantized_db(), store_id, with_items) } else { - Ok(arroy::Reader::open(rtxn, first_id, self.angular_db())?.dimensions()) + self._items_in_store(rtxn, self.angular_db(), store_id, with_items) + } + } + + fn _items_in_store( + &self, + rtxn: &RoTxn, + db: arroy::Database, + store_id: u8, + with_items: F, + ) -> Result + where + F: FnOnce(&RoaringBitmap) -> O, + { + let index = arroy_store_for_embedder(self.embedder_index, store_id); + let reader = arroy::Reader::open(rtxn, index, db); + match reader { + Ok(reader) => Ok(with_items(reader.item_ids())), + Err(arroy::Error::MissingMetadata(_)) => Ok(with_items(&RoaringBitmap::new())), + Err(err) => Err(err), + } + } + + pub fn dimensions(&self, rtxn: &RoTxn) -> Result, arroy::Error> { + if self.quantized { + Ok(self + .readers(rtxn, self.quantized_db()) + .next() + .transpose()? + .map(|reader| reader.dimensions())) + } else { + Ok(self + .readers(rtxn, self.angular_db()) + .next() + .transpose()? + .map(|reader| reader.dimensions())) } } @@ -93,13 +143,13 @@ impl ArroyWrapper { arroy_memory: Option, cancel: &(impl Fn() -> bool + Sync + Send), ) -> Result<(), arroy::Error> { - for index in arroy_db_range_for_embedder(self.embedder_index) { + for index in arroy_store_range_for_embedder(self.embedder_index) { if self.quantized { let writer = arroy::Writer::new(self.quantized_db(), index, dimension); if writer.need_build(wtxn)? { writer.builder(rng).build(wtxn)? } else if writer.is_empty(wtxn)? { - break; + continue; } } else { let writer = arroy::Writer::new(self.angular_db(), index, dimension); @@ -124,7 +174,7 @@ impl ArroyWrapper { .cancel(cancel) .build(wtxn)?; } else if writer.is_empty(wtxn)? { - break; + continue; } } } @@ -143,7 +193,7 @@ impl ArroyWrapper { ) -> Result<(), arroy::Error> { let dimension = embeddings.dimension(); for (index, vector) in - arroy_db_range_for_embedder(self.embedder_index).zip(embeddings.iter()) + arroy_store_range_for_embedder(self.embedder_index).zip(embeddings.iter()) { if self.quantized { arroy::Writer::new(self.quantized_db(), index, dimension) @@ -179,7 +229,7 @@ impl ArroyWrapper { ) -> Result<(), arroy::Error> { let dimension = vector.len(); - for index in arroy_db_range_for_embedder(self.embedder_index) { + for index in arroy_store_range_for_embedder(self.embedder_index) { let writer = arroy::Writer::new(db, index, dimension); if !writer.contains_item(wtxn, item_id)? { writer.add_item(wtxn, item_id, vector)?; @@ -189,6 +239,38 @@ impl ArroyWrapper { Ok(()) } + /// Add a vector associated with a document in store specified by its id. + /// + /// Any existing vector associated with the document in the store will be replaced by the new vector. + pub fn add_item_in_store( + &self, + wtxn: &mut RwTxn, + item_id: arroy::ItemId, + store_id: u8, + vector: &[f32], + ) -> Result<(), arroy::Error> { + if self.quantized { + self._add_item_in_store(wtxn, self.quantized_db(), item_id, store_id, vector) + } else { + self._add_item_in_store(wtxn, self.angular_db(), item_id, store_id, vector) + } + } + + fn _add_item_in_store( + &self, + wtxn: &mut RwTxn, + db: arroy::Database, + item_id: arroy::ItemId, + store_id: u8, + vector: &[f32], + ) -> Result<(), arroy::Error> { + let dimension = vector.len(); + + let index = arroy_store_for_embedder(self.embedder_index, store_id); + let writer = arroy::Writer::new(db, index, dimension); + writer.add_item(wtxn, item_id, vector) + } + /// Delete all embeddings from a specific `item_id` pub fn del_items( &self, @@ -196,24 +278,84 @@ impl ArroyWrapper { dimension: usize, item_id: arroy::ItemId, ) -> Result<(), arroy::Error> { - for index in arroy_db_range_for_embedder(self.embedder_index) { + for index in arroy_store_range_for_embedder(self.embedder_index) { if self.quantized { let writer = arroy::Writer::new(self.quantized_db(), index, dimension); - if !writer.del_item(wtxn, item_id)? { - break; - } + writer.del_item(wtxn, item_id)?; } else { let writer = arroy::Writer::new(self.angular_db(), index, dimension); - if !writer.del_item(wtxn, item_id)? { - break; - } + writer.del_item(wtxn, item_id)?; } } Ok(()) } - /// Delete one item. + /// Removes the item specified by its id from the store specified by its id. + /// + /// Returns whether the item was removed. + /// + /// # Warning + /// + /// - This function will silently fail to remove the item if used against an arroy database that was never built. + pub fn del_item_in_store( + &self, + wtxn: &mut RwTxn, + item_id: arroy::ItemId, + store_id: u8, + dimensions: usize, + ) -> Result { + if self.quantized { + self._del_item_in_store(wtxn, self.quantized_db(), item_id, store_id, dimensions) + } else { + self._del_item_in_store(wtxn, self.angular_db(), item_id, store_id, dimensions) + } + } + + fn _del_item_in_store( + &self, + wtxn: &mut RwTxn, + db: arroy::Database, + item_id: arroy::ItemId, + store_id: u8, + dimensions: usize, + ) -> Result { + let index = arroy_store_for_embedder(self.embedder_index, store_id); + let writer = arroy::Writer::new(db, index, dimensions); + writer.del_item(wtxn, item_id) + } + + /// Removes all items from the store specified by its id. + /// + /// # Warning + /// + /// - This function will silently fail to remove the items if used against an arroy database that was never built. + pub fn clear_store( + &self, + wtxn: &mut RwTxn, + store_id: u8, + dimensions: usize, + ) -> Result<(), arroy::Error> { + if self.quantized { + self._clear_store(wtxn, self.quantized_db(), store_id, dimensions) + } else { + self._clear_store(wtxn, self.angular_db(), store_id, dimensions) + } + } + + fn _clear_store( + &self, + wtxn: &mut RwTxn, + db: arroy::Database, + store_id: u8, + dimensions: usize, + ) -> Result<(), arroy::Error> { + let index = arroy_store_for_embedder(self.embedder_index, store_id); + let writer = arroy::Writer::new(db, index, dimensions); + writer.clear(wtxn) + } + + /// Delete one item from its value. pub fn del_item( &self, wtxn: &mut RwTxn, @@ -235,54 +377,31 @@ impl ArroyWrapper { vector: &[f32], ) -> Result { let dimension = vector.len(); - let mut deleted_index = None; - for index in arroy_db_range_for_embedder(self.embedder_index) { + for index in arroy_store_range_for_embedder(self.embedder_index) { let writer = arroy::Writer::new(db, index, dimension); let Some(candidate) = writer.item_vector(wtxn, item_id)? else { - // uses invariant: vectors are packed in the first writers. - break; + continue; }; if candidate == vector { - writer.del_item(wtxn, item_id)?; - deleted_index = Some(index); + return writer.del_item(wtxn, item_id); } } - - // 🥲 enforce invariant: vectors are packed in the first writers. - if let Some(deleted_index) = deleted_index { - let mut last_index_with_a_vector = None; - for index in - arroy_db_range_for_embedder(self.embedder_index).skip(deleted_index as usize) - { - let writer = arroy::Writer::new(db, index, dimension); - let Some(candidate) = writer.item_vector(wtxn, item_id)? else { - break; - }; - last_index_with_a_vector = Some((index, candidate)); - } - if let Some((last_index, vector)) = last_index_with_a_vector { - let writer = arroy::Writer::new(db, last_index, dimension); - writer.del_item(wtxn, item_id)?; - let writer = arroy::Writer::new(db, deleted_index, dimension); - writer.add_item(wtxn, item_id, &vector)?; - } - } - Ok(deleted_index.is_some()) + Ok(false) } pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { - for index in arroy_db_range_for_embedder(self.embedder_index) { + for index in arroy_store_range_for_embedder(self.embedder_index) { if self.quantized { let writer = arroy::Writer::new(self.quantized_db(), index, dimension); if writer.is_empty(wtxn)? { - break; + continue; } writer.clear(wtxn)?; } else { let writer = arroy::Writer::new(self.angular_db(), index, dimension); if writer.is_empty(wtxn)? { - break; + continue; } writer.clear(wtxn)?; } @@ -296,17 +415,17 @@ impl ArroyWrapper { dimension: usize, item: arroy::ItemId, ) -> Result { - for index in arroy_db_range_for_embedder(self.embedder_index) { + for index in arroy_store_range_for_embedder(self.embedder_index) { let contains = if self.quantized { let writer = arroy::Writer::new(self.quantized_db(), index, dimension); if writer.is_empty(rtxn)? { - break; + continue; } writer.contains_item(rtxn, item)? } else { let writer = arroy::Writer::new(self.angular_db(), index, dimension); if writer.is_empty(rtxn)? { - break; + continue; } writer.contains_item(rtxn, item)? }; @@ -345,13 +464,14 @@ impl ArroyWrapper { let reader = reader?; let mut searcher = reader.nns(limit); if let Some(filter) = filter { + if reader.item_ids().is_disjoint(filter) { + continue; + } searcher.candidates(filter); } if let Some(mut ret) = searcher.by_item(rtxn, item)? { results.append(&mut ret); - } else { - break; } } results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); @@ -386,6 +506,9 @@ impl ArroyWrapper { let reader = reader?; let mut searcher = reader.nns(limit); if let Some(filter) = filter { + if reader.item_ids().is_disjoint(filter) { + continue; + } searcher.candidates(filter); } @@ -404,16 +527,12 @@ impl ArroyWrapper { for reader in self.readers(rtxn, self.quantized_db()) { if let Some(vec) = reader?.item_vector(rtxn, item_id)? { vectors.push(vec); - } else { - break; } } } else { for reader in self.readers(rtxn, self.angular_db()) { if let Some(vec) = reader?.item_vector(rtxn, item_id)? { vectors.push(vec); - } else { - break; } } } @@ -465,6 +584,7 @@ pub struct ArroyStats { pub documents: RoaringBitmap, } /// One or multiple embeddings stored consecutively in a flat vector. +#[derive(Debug, PartialEq)] pub struct Embeddings { data: Vec, dimension: usize, @@ -615,15 +735,43 @@ impl EmbeddingConfig { } } -/// Map of embedder configurations. -/// -/// Each configuration is mapped to a name. +/// Map of runtime embedder data. #[derive(Clone, Default)] -pub struct EmbeddingConfigs(HashMap, Arc, bool)>); +pub struct RuntimeEmbedders(HashMap>); -impl EmbeddingConfigs { +pub struct RuntimeEmbedder { + pub embedder: Arc, + pub document_template: Prompt, + fragments: Vec, + pub is_quantized: bool, +} + +impl RuntimeEmbedder { + pub fn new( + embedder: Arc, + document_template: Prompt, + mut fragments: Vec, + is_quantized: bool, + ) -> Self { + fragments.sort_unstable_by(|left, right| left.name.cmp(&right.name)); + Self { embedder, document_template, fragments, is_quantized } + } + + /// The runtime fragments sorted by name. + pub fn fragments(&self) -> &[RuntimeFragment] { + self.fragments.as_slice() + } +} + +pub struct RuntimeFragment { + pub name: String, + pub id: u8, + pub template: JsonTemplate, +} + +impl RuntimeEmbedders { /// Create the map from its internal component.s - pub fn new(data: HashMap, Arc, bool)>) -> Self { + pub fn new(data: HashMap>) -> Self { Self(data) } @@ -632,24 +780,31 @@ impl EmbeddingConfigs { } /// Get an embedder configuration and template from its name. - pub fn get(&self, name: &str) -> Option<(Arc, Arc, bool)> { - self.0.get(name).cloned() + pub fn get(&self, name: &str) -> Option<&Arc> { + self.0.get(name) } - pub fn inner_as_ref(&self) -> &HashMap, Arc, bool)> { + pub fn inner_as_ref(&self) -> &HashMap> { &self.0 } - pub fn into_inner(self) -> HashMap, Arc, bool)> { + pub fn into_inner(self) -> HashMap> { self.0 } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } -impl IntoIterator for EmbeddingConfigs { - type Item = (String, (Arc, Arc, bool)); +impl IntoIterator for RuntimeEmbedders { + type Item = (String, Arc); - type IntoIter = - std::collections::hash_map::IntoIter, Arc, bool)>; + type IntoIter = std::collections::hash_map::IntoIter>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() @@ -667,6 +822,27 @@ pub enum EmbedderOptions { Composite(composite::EmbedderOptions), } +impl EmbedderOptions { + pub fn fragment(&self, name: &str) -> Option<&serde_json::Value> { + match &self { + EmbedderOptions::HuggingFace(_) + | EmbedderOptions::OpenAi(_) + | EmbedderOptions::Ollama(_) + | EmbedderOptions::UserProvided(_) => None, + EmbedderOptions::Rest(embedder_options) => { + embedder_options.indexing_fragments.get(name) + } + EmbedderOptions::Composite(embedder_options) => { + if let SubEmbedderOptions::Rest(embedder_options) = &embedder_options.index { + embedder_options.indexing_fragments.get(name) + } else { + None + } + } + } + } +} + impl Default for EmbedderOptions { fn default() -> Self { Self::HuggingFace(Default::default()) @@ -707,6 +883,17 @@ impl Embedder { #[tracing::instrument(level = "debug", skip_all, target = "search")] pub fn embed_search( + &self, + query: SearchQuery<'_>, + deadline: Option, + ) -> std::result::Result { + match query { + SearchQuery::Text(text) => self.embed_search_text(text, deadline), + SearchQuery::Media { q, media } => self.embed_search_media(q, media, deadline), + } + } + + pub fn embed_search_text( &self, text: &str, deadline: Option, @@ -719,18 +906,17 @@ impl Embedder { } let embedding = match self { Embedder::HuggingFace(embedder) => embedder.embed_one(text), - Embedder::OpenAi(embedder) => { - embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding) - } - Embedder::Ollama(embedder) => { - embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding) - } - Embedder::UserProvided(embedder) => embedder.embed_one(text), - Embedder::Rest(embedder) => embedder - .embed_ref(&[text], deadline)? + Embedder::OpenAi(embedder) => embedder + .embed(&[text], deadline, None)? .pop() .ok_or_else(EmbedError::missing_embedding), - Embedder::Composite(embedder) => embedder.search.embed_one(text, deadline), + Embedder::Ollama(embedder) => embedder + .embed(&[text], deadline, None)? + .pop() + .ok_or_else(EmbedError::missing_embedding), + Embedder::UserProvided(embedder) => embedder.embed_one(text), + Embedder::Rest(embedder) => embedder.embed_one(SearchQuery::Text(text), deadline, None), + Embedder::Composite(embedder) => embedder.search.embed_one(text, deadline, None), }?; if let Some(cache) = self.cache() { @@ -740,6 +926,18 @@ impl Embedder { Ok(embedding) } + pub fn embed_search_media( + &self, + q: Option<&str>, + media: Option<&serde_json::Value>, + deadline: Option, + ) -> std::result::Result { + let Embedder::Rest(embedder) = self else { + return Err(EmbedError::rest_media_not_a_rest()); + }; + embedder.embed_one(SearchQuery::Media { q, media }, deadline, None) + } + /// Embed multiple chunks of texts. /// /// Each chunk is composed of one or multiple texts. @@ -747,14 +945,21 @@ impl Embedder { &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> std::result::Result>, EmbedError> { match self { Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks), - Embedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads), - Embedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads), + Embedder::OpenAi(embedder) => { + embedder.embed_index(text_chunks, threads, embedder_stats) + } + Embedder::Ollama(embedder) => { + embedder.embed_index(text_chunks, threads, embedder_stats) + } Embedder::UserProvided(embedder) => embedder.embed_index(text_chunks), - Embedder::Rest(embedder) => embedder.embed_index(text_chunks, threads), - Embedder::Composite(embedder) => embedder.index.embed_index(text_chunks, threads), + Embedder::Rest(embedder) => embedder.embed_index(text_chunks, threads, embedder_stats), + Embedder::Composite(embedder) => { + embedder.index.embed_index(text_chunks, threads, embedder_stats) + } } } @@ -763,14 +968,37 @@ impl Embedder { &self, texts: &[&str], threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> std::result::Result, EmbedError> { match self { Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts), - Embedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads), - Embedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads), + Embedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads, embedder_stats), + Embedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads, embedder_stats), Embedder::UserProvided(embedder) => embedder.embed_index_ref(texts), - Embedder::Rest(embedder) => embedder.embed_index_ref(texts, threads), - Embedder::Composite(embedder) => embedder.index.embed_index_ref(texts, threads), + Embedder::Rest(embedder) => embedder.embed_index_ref(texts, threads, embedder_stats), + Embedder::Composite(embedder) => { + embedder.index.embed_index_ref(texts, threads, embedder_stats) + } + } + } + + pub fn embed_index_ref_fragments( + &self, + fragments: &[serde_json::Value], + threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, + ) -> std::result::Result, EmbedError> { + if let Embedder::Rest(embedder) = self { + embedder.embed_index_ref(fragments, threads, embedder_stats) + } else { + let Embedder::Composite(embedder) = self else { + unimplemented!("embedding fragments is only available for rest embedders") + }; + let crate::vector::composite::SubEmbedder::Rest(embedder) = &embedder.index else { + unimplemented!("embedding fragments is only available for rest embedders") + }; + + embedder.embed_index_ref(fragments, threads, embedder_stats) } } @@ -845,6 +1073,12 @@ impl Embedder { } } +#[derive(Clone, Copy)] +pub enum SearchQuery<'a> { + Text(&'a str), + Media { q: Option<&'a str>, media: Option<&'a serde_json::Value> }, +} + /// Describes the mean and sigma of distribution of embedding similarity in the embedding space. /// /// The intended use is to make the similarity score more comparable to the regular ranking score. @@ -974,8 +1208,11 @@ pub const fn is_cuda_enabled() -> bool { cfg!(feature = "cuda") } -pub fn arroy_db_range_for_embedder(embedder_id: u8) -> impl Iterator { - let embedder_id = (embedder_id as u16) << 8; - - (0..=u8::MAX).map(move |k| embedder_id | (k as u16)) +fn arroy_store_range_for_embedder(embedder_id: u8) -> impl Iterator { + (0..=u8::MAX).map(move |store_id| arroy_store_for_embedder(embedder_id, store_id)) +} + +fn arroy_store_for_embedder(embedder_id: u8, store_id: u8) -> u16 { + let embedder_id = (embedder_id as u16) << 8; + embedder_id | (store_id as u16) } diff --git a/crates/milli/src/vector/ollama.rs b/crates/milli/src/vector/ollama.rs index 8beae6205..feec92cc0 100644 --- a/crates/milli/src/vector/ollama.rs +++ b/crates/milli/src/vector/ollama.rs @@ -7,6 +7,7 @@ use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErro use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; use super::{DistributionShift, EmbeddingCache, REQUEST_PARALLELISM}; use crate::error::FaultSource; +use crate::progress::EmbedderStats; use crate::vector::Embedding; use crate::ThreadPoolNoAbort; @@ -70,6 +71,8 @@ impl EmbedderOptions { request, response, headers: Default::default(), + indexing_fragments: Default::default(), + search_fragments: Default::default(), }) } } @@ -104,8 +107,9 @@ impl Embedder { &self, texts: &[S], deadline: Option, + embedder_stats: Option<&EmbedderStats>, ) -> Result, EmbedError> { - match self.rest_embedder.embed_ref(texts, deadline) { + match self.rest_embedder.embed_ref(texts, deadline, embedder_stats) { Ok(embeddings) => Ok(embeddings), Err(EmbedError { kind: EmbedErrorKind::RestOtherStatusCode(404, error), fault: _ }) => { Err(EmbedError::ollama_model_not_found(error)) @@ -118,15 +122,22 @@ impl Embedder { &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> Result>, EmbedError> { // This condition helps reduce the number of active rayon jobs // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. if threads.active_operations() >= REQUEST_PARALLELISM { - text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect() + text_chunks + .into_iter() + .map(move |chunk| self.embed(&chunk, None, Some(embedder_stats))) + .collect() } else { threads .install(move || { - text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() + text_chunks + .into_par_iter() + .map(move |chunk| self.embed(&chunk, None, Some(embedder_stats))) + .collect() }) .map_err(|error| EmbedError { kind: EmbedErrorKind::PanicInThreadPool(error), @@ -139,13 +150,14 @@ impl Embedder { &self, texts: &[&str], threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> Result>, EmbedError> { // This condition helps reduce the number of active rayon jobs // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. if threads.active_operations() >= REQUEST_PARALLELISM { let embeddings: Result>, _> = texts .chunks(self.prompt_count_in_chunk_hint()) - .map(move |chunk| self.embed(chunk, None)) + .map(move |chunk| self.embed(chunk, None, Some(embedder_stats))) .collect(); let embeddings = embeddings?; @@ -155,7 +167,7 @@ impl Embedder { .install(move || { let embeddings: Result>, _> = texts .par_chunks(self.prompt_count_in_chunk_hint()) - .map(move |chunk| self.embed(chunk, None)) + .map(move |chunk| self.embed(chunk, None, Some(embedder_stats))) .collect(); let embeddings = embeddings?; diff --git a/crates/milli/src/vector/openai.rs b/crates/milli/src/vector/openai.rs index df29f6916..bf6c92978 100644 --- a/crates/milli/src/vector/openai.rs +++ b/crates/milli/src/vector/openai.rs @@ -9,6 +9,7 @@ use super::error::{EmbedError, NewEmbedderError}; use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; use super::{DistributionShift, EmbeddingCache, REQUEST_PARALLELISM}; use crate::error::FaultSource; +use crate::progress::EmbedderStats; use crate::vector::error::EmbedErrorKind; use crate::vector::Embedding; use crate::ThreadPoolNoAbort; @@ -200,6 +201,8 @@ impl Embedder { ] }), headers: Default::default(), + indexing_fragments: Default::default(), + search_fragments: Default::default(), }, cache_cap, super::rest::ConfigurationSource::OpenAi, @@ -215,8 +218,9 @@ impl Embedder { &self, texts: &[S], deadline: Option, + embedder_stats: Option<&EmbedderStats>, ) -> Result, EmbedError> { - match self.rest_embedder.embed_ref(texts, deadline) { + match self.rest_embedder.embed_ref(texts, deadline, embedder_stats) { Ok(embeddings) => Ok(embeddings), Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error, _), fault: _ }) => { tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template."); @@ -238,7 +242,11 @@ impl Embedder { let encoded = self.tokenizer.encode_ordinary(text); let len = encoded.len(); if len < max_token_count { - all_embeddings.append(&mut self.rest_embedder.embed_ref(&[text], deadline)?); + all_embeddings.append(&mut self.rest_embedder.embed_ref( + &[text], + deadline, + None, + )?); continue; } @@ -255,15 +263,22 @@ impl Embedder { &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> Result>, EmbedError> { // This condition helps reduce the number of active rayon jobs // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. if threads.active_operations() >= REQUEST_PARALLELISM { - text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect() + text_chunks + .into_iter() + .map(move |chunk| self.embed(&chunk, None, Some(embedder_stats))) + .collect() } else { threads .install(move || { - text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() + text_chunks + .into_par_iter() + .map(move |chunk| self.embed(&chunk, None, Some(embedder_stats))) + .collect() }) .map_err(|error| EmbedError { kind: EmbedErrorKind::PanicInThreadPool(error), @@ -276,13 +291,14 @@ impl Embedder { &self, texts: &[&str], threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> Result>, EmbedError> { // This condition helps reduce the number of active rayon jobs // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. if threads.active_operations() >= REQUEST_PARALLELISM { let embeddings: Result>, _> = texts .chunks(self.prompt_count_in_chunk_hint()) - .map(move |chunk| self.embed(chunk, None)) + .map(move |chunk| self.embed(chunk, None, Some(embedder_stats))) .collect(); let embeddings = embeddings?; Ok(embeddings.into_iter().flatten().collect()) @@ -291,7 +307,7 @@ impl Embedder { .install(move || { let embeddings: Result>, _> = texts .par_chunks(self.prompt_count_in_chunk_hint()) - .map(move |chunk| self.embed(chunk, None)) + .map(move |chunk| self.embed(chunk, None, Some(embedder_stats))) .collect(); let embeddings = embeddings?; diff --git a/crates/milli/src/vector/parsed_vectors.rs b/crates/milli/src/vector/parsed_vectors.rs index 5fcb2912b..b96922bc4 100644 --- a/crates/milli/src/vector/parsed_vectors.rs +++ b/crates/milli/src/vector/parsed_vectors.rs @@ -6,9 +6,8 @@ use serde_json::value::RawValue; use serde_json::{from_slice, Value}; use super::Embedding; -use crate::index::IndexEmbeddingConfig; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; -use crate::{DocumentId, FieldId, InternalError, UserError}; +use crate::{FieldId, InternalError, UserError}; #[derive(serde::Serialize, Debug)] #[serde(untagged)] @@ -151,7 +150,8 @@ impl<'doc> serde::de::Visitor<'doc> for RawVectorsVisitor { regenerate = Some(value); } Ok(Some("embeddings")) => { - let value: &RawValue = match map.next_value() { + let value: &RawValue = match map.next_value::<&RawValue>() { + Ok(value) if value.get() == RawValue::NULL.get() => continue, Ok(value) => value, Err(error) => { return Ok(Err(RawVectorsError::DeserializeEmbeddings { @@ -374,8 +374,7 @@ pub struct ParsedVectorsDiff { impl ParsedVectorsDiff { pub fn new( - docid: DocumentId, - embedders_configs: &[IndexEmbeddingConfig], + regenerate_for_embedders: impl Iterator, documents_diff: &KvReader, old_vectors_fid: Option, new_vectors_fid: Option, @@ -396,10 +395,8 @@ impl ParsedVectorsDiff { } } .flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, VectorState::Inline(vec))).collect()); - for embedding_config in embedders_configs { - if embedding_config.user_provided.contains(docid) { - old.entry(embedding_config.name.to_string()).or_insert(VectorState::Manual); - } + for name in regenerate_for_embedders { + old.entry(name).or_insert(VectorState::Generated); } let new = 'new: { diff --git a/crates/milli/src/vector/rest.rs b/crates/milli/src/vector/rest.rs index b87ac9f77..7a16f1a1e 100644 --- a/crates/milli/src/vector/rest.rs +++ b/crates/milli/src/vector/rest.rs @@ -6,13 +6,16 @@ use rand::Rng; use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _}; use rayon::slice::ParallelSlice as _; use serde::{Deserialize, Serialize}; +use serde_json::Value; use super::error::EmbedErrorKind; -use super::json_template::ValueTemplate; +use super::json_template::{InjectableValue, JsonTemplate}; use super::{ - DistributionShift, EmbedError, Embedding, EmbeddingCache, NewEmbedderError, REQUEST_PARALLELISM, + DistributionShift, EmbedError, Embedding, EmbeddingCache, NewEmbedderError, SearchQuery, + REQUEST_PARALLELISM, }; use crate::error::FaultSource; +use crate::progress::EmbedderStats; use crate::ThreadPoolNoAbort; // retrying in case of failure @@ -87,19 +90,61 @@ struct EmbedderData { bearer: Option, headers: BTreeMap, url: String, - request: Request, + request: RequestData, response: Response, configuration_source: ConfigurationSource, } +#[derive(Debug)] +pub enum RequestData { + Single(Request), + FromFragments(RequestFromFragments), +} + +impl RequestData { + pub fn new( + request: Value, + indexing_fragments: BTreeMap, + search_fragments: BTreeMap, + ) -> Result { + Ok(if indexing_fragments.is_empty() && search_fragments.is_empty() { + RequestData::Single(Request::new(request)?) + } else { + for (name, value) in indexing_fragments { + JsonTemplate::new(value).map_err(|error| { + NewEmbedderError::rest_could_not_parse_template( + error.parsing(&format!(".indexingFragments.{name}")), + ) + })?; + } + RequestData::FromFragments(RequestFromFragments::new(request, search_fragments)?) + }) + } + + fn input_type(&self) -> InputType { + match self { + RequestData::Single(request) => request.input_type(), + RequestData::FromFragments(request_from_fragments) => { + request_from_fragments.input_type() + } + } + } + + fn has_fragments(&self) -> bool { + matches!(self, RequestData::FromFragments(_)) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] pub struct EmbedderOptions { pub api_key: Option, pub distribution: Option, pub dimensions: Option, pub url: String, - pub request: serde_json::Value, - pub response: serde_json::Value, + pub request: Value, + pub search_fragments: BTreeMap, + pub indexing_fragments: BTreeMap, + pub response: Value, pub headers: BTreeMap, } @@ -137,7 +182,12 @@ impl Embedder { .timeout(std::time::Duration::from_secs(30)) .build(); - let request = Request::new(options.request)?; + let request = RequestData::new( + options.request, + options.indexing_fragments, + options.search_fragments, + )?; + let response = Response::new(options.response, &request)?; let data = EmbedderData { @@ -168,19 +218,28 @@ impl Embedder { &self, texts: Vec, deadline: Option, + embedder_stats: Option<&EmbedderStats>, ) -> Result, EmbedError> { - embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions), deadline) + embed( + &self.data, + texts.as_slice(), + texts.len(), + Some(self.dimensions), + deadline, + embedder_stats, + ) } pub fn embed_ref( &self, texts: &[S], deadline: Option, + embedder_stats: Option<&EmbedderStats>, ) -> Result, EmbedError> where - S: AsRef + Serialize, + S: Serialize, { - embed(&self.data, texts, texts.len(), Some(self.dimensions), deadline) + embed(&self.data, texts, texts.len(), Some(self.dimensions), deadline, embedder_stats) } pub fn embed_tokens( @@ -188,7 +247,7 @@ impl Embedder { tokens: &[u32], deadline: Option, ) -> Result { - let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?; + let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline, None)?; // unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error Ok(embeddings.pop().unwrap()) } @@ -197,15 +256,22 @@ impl Embedder { &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> Result>, EmbedError> { // This condition helps reduce the number of active rayon jobs // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. if threads.active_operations() >= REQUEST_PARALLELISM { - text_chunks.into_iter().map(move |chunk| self.embed(chunk, None)).collect() + text_chunks + .into_iter() + .map(move |chunk| self.embed(chunk, None, Some(embedder_stats))) + .collect() } else { threads .install(move || { - text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect() + text_chunks + .into_par_iter() + .map(move |chunk| self.embed(chunk, None, Some(embedder_stats))) + .collect() }) .map_err(|error| EmbedError { kind: EmbedErrorKind::PanicInThreadPool(error), @@ -214,17 +280,18 @@ impl Embedder { } } - pub(crate) fn embed_index_ref( + pub(crate) fn embed_index_ref( &self, - texts: &[&str], + texts: &[S], threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, ) -> Result, EmbedError> { // This condition helps reduce the number of active rayon jobs // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. if threads.active_operations() >= REQUEST_PARALLELISM { let embeddings: Result>, _> = texts .chunks(self.prompt_count_in_chunk_hint()) - .map(move |chunk| self.embed_ref(chunk, None)) + .map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats))) .collect(); let embeddings = embeddings?; @@ -234,7 +301,7 @@ impl Embedder { .install(move || { let embeddings: Result>, _> = texts .par_chunks(self.prompt_count_in_chunk_hint()) - .map(move |chunk| self.embed_ref(chunk, None)) + .map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats))) .collect(); let embeddings = embeddings?; @@ -269,10 +336,45 @@ impl Embedder { pub(super) fn cache(&self) -> &EmbeddingCache { &self.cache } + + pub(crate) fn embed_one( + &self, + query: SearchQuery, + deadline: Option, + embedder_stats: Option<&EmbedderStats>, + ) -> Result { + let mut embeddings = match (&self.data.request, query) { + (RequestData::Single(_), SearchQuery::Text(text)) => { + embed(&self.data, &[text], 1, Some(self.dimensions), deadline, embedder_stats) + } + (RequestData::Single(_), SearchQuery::Media { q: _, media: _ }) => { + return Err(EmbedError::rest_media_not_a_fragment()) + } + (RequestData::FromFragments(request_from_fragments), SearchQuery::Text(q)) => { + let fragment = request_from_fragments.render_search_fragment(Some(q), None)?; + + embed(&self.data, &[fragment], 1, Some(self.dimensions), deadline, embedder_stats) + } + ( + RequestData::FromFragments(request_from_fragments), + SearchQuery::Media { q, media }, + ) => { + let fragment = request_from_fragments.render_search_fragment(q, media)?; + + embed(&self.data, &[fragment], 1, Some(self.dimensions), deadline, embedder_stats) + } + }?; + + // unwrap: checked by `expected_count` + Ok(embeddings.pop().unwrap()) + } } fn infer_dimensions(data: &EmbedderData) -> Result { - let v = embed(data, ["test"].as_slice(), 1, None, None) + if data.request.has_fragments() { + return Err(NewEmbedderError::rest_cannot_infer_dimensions_for_fragment()); + } + let v = embed(data, ["test"].as_slice(), 1, None, None, None) .map_err(NewEmbedderError::could_not_determine_dimension)?; // unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error Ok(v.first().unwrap().len()) @@ -284,10 +386,18 @@ fn embed( expected_count: usize, expected_dimension: Option, deadline: Option, + embedder_stats: Option<&EmbedderStats>, ) -> Result, EmbedError> where S: Serialize, { + if inputs.is_empty() { + if expected_count != 0 { + return Err(EmbedError::rest_response_embedding_count(expected_count, 0)); + } + return Ok(Vec::new()); + } + let request = data.client.post(&data.url); let request = if let Some(bearer) = &data.bearer { request.set("Authorization", bearer) @@ -299,9 +409,17 @@ where request = request.set(header.as_str(), value.as_str()); } - let body = data.request.inject_texts(inputs); + let body = match &data.request { + RequestData::Single(request) => request.inject_texts(inputs), + RequestData::FromFragments(request_from_fragments) => { + request_from_fragments.request_from_fragments(inputs).expect("inputs was empty") + } + }; for attempt in 0..10 { + if let Some(embedder_stats) = &embedder_stats { + embedder_stats.total_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + } let response = request.clone().send_json(&body); let result = check_response(response, data.configuration_source).and_then(|response| { response_to_embedding(response, data, expected_count, expected_dimension) @@ -311,6 +429,13 @@ where Ok(response) => return Ok(response), Err(retry) => { tracing::warn!("Failed: {}", retry.error); + if let Some(embedder_stats) = &embedder_stats { + let stringified_error = retry.error.to_string(); + let mut errors = + embedder_stats.errors.write().unwrap_or_else(|p| p.into_inner()); + errors.0 = Some(stringified_error); + errors.1 += 1; + } if let Some(deadline) = deadline { let now = std::time::Instant::now(); if now > deadline { @@ -336,12 +461,26 @@ where std::thread::sleep(retry_duration); } + if let Some(embedder_stats) = &embedder_stats { + embedder_stats.total_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + } let response = request.send_json(&body); - let result = check_response(response, data.configuration_source); - result.map_err(Retry::into_error).and_then(|response| { + let result = check_response(response, data.configuration_source).and_then(|response| { response_to_embedding(response, data, expected_count, expected_dimension) - .map_err(Retry::into_error) - }) + }); + + match result { + Ok(response) => Ok(response), + Err(retry) => { + if let Some(embedder_stats) = &embedder_stats { + let stringified_error = retry.error.to_string(); + let mut errors = embedder_stats.errors.write().unwrap_or_else(|p| p.into_inner()); + errors.0 = Some(stringified_error); + errors.1 += 1; + }; + Err(retry.into_error()) + } + } } fn check_response( @@ -383,7 +522,7 @@ fn response_to_embedding( expected_count: usize, expected_dimensions: Option, ) -> Result, Retry> { - let response: serde_json::Value = response + let response: Value = response .into_json() .map_err(EmbedError::rest_response_deserialization) .map_err(Retry::retry_later)?; @@ -412,21 +551,24 @@ fn response_to_embedding( } pub(super) const REQUEST_PLACEHOLDER: &str = "{{text}}"; +pub(super) const REQUEST_FRAGMENT_PLACEHOLDER: &str = "{{fragment}}"; pub(super) const RESPONSE_PLACEHOLDER: &str = "{{embedding}}"; pub(super) const REPEAT_PLACEHOLDER: &str = "{{..}}"; #[derive(Debug)] pub struct Request { - template: ValueTemplate, + template: InjectableValue, } impl Request { - pub fn new(template: serde_json::Value) -> Result { - let template = match ValueTemplate::new(template, REQUEST_PLACEHOLDER, REPEAT_PLACEHOLDER) { + pub fn new(template: Value) -> Result { + let template = match InjectableValue::new(template, REQUEST_PLACEHOLDER, REPEAT_PLACEHOLDER) + { Ok(template) => template, Err(error) => { let message = error.error_message("request", REQUEST_PLACEHOLDER, REPEAT_PLACEHOLDER); + let message = format!("{message}\n - Note: this template is using a document template, and so expects to contain the placeholder {REQUEST_PLACEHOLDER:?} rather than {REQUEST_FRAGMENT_PLACEHOLDER:?}"); return Err(NewEmbedderError::rest_could_not_parse_template(message)); } }; @@ -442,42 +584,120 @@ impl Request { } } - pub fn inject_texts( - &self, - texts: impl IntoIterator, - ) -> serde_json::Value { + pub fn inject_texts(&self, texts: impl IntoIterator) -> Value { self.template.inject(texts.into_iter().map(|s| serde_json::json!(s))).unwrap() } } #[derive(Debug)] -pub struct Response { - template: ValueTemplate, +pub struct RequestFromFragments { + search_fragments: BTreeMap, + request: InjectableValue, } -impl Response { - pub fn new(template: serde_json::Value, request: &Request) -> Result { - let template = match ValueTemplate::new(template, RESPONSE_PLACEHOLDER, REPEAT_PLACEHOLDER) - { +impl RequestFromFragments { + pub fn new( + request: Value, + search_fragments: impl IntoIterator, + ) -> Result { + let request = match InjectableValue::new( + request, + REQUEST_FRAGMENT_PLACEHOLDER, + REPEAT_PLACEHOLDER, + ) { Ok(template) => template, Err(error) => { - let message = - error.error_message("response", RESPONSE_PLACEHOLDER, REPEAT_PLACEHOLDER); + let message = error.error_message( + "request", + REQUEST_FRAGMENT_PLACEHOLDER, + REPEAT_PLACEHOLDER, + ); + let message = format!("{message}\n - Note: this template is using fragments, and so expects to contain the placeholder {REQUEST_FRAGMENT_PLACEHOLDER:?} rathern than {REQUEST_PLACEHOLDER:?}"); + return Err(NewEmbedderError::rest_could_not_parse_template(message)); } }; - match (template.has_array_value(), request.template.has_array_value()) { + let search_fragments: Result<_, NewEmbedderError> = search_fragments + .into_iter() + .map(|(name, value)| { + let json_template = JsonTemplate::new(value).map_err(|error| { + NewEmbedderError::rest_could_not_parse_template( + error.parsing(&format!(".searchFragments.{name}")), + ) + })?; + Ok((name, json_template)) + }) + .collect(); + + Ok(Self { request, search_fragments: search_fragments? }) + } + + fn input_type(&self) -> InputType { + if self.request.has_array_value() { + InputType::TextArray + } else { + InputType::Text + } + } + + pub fn render_search_fragment( + &self, + q: Option<&str>, + media: Option<&Value>, + ) -> Result { + let mut it = self.search_fragments.iter().filter_map(|(name, template)| { + let render = template.render_search(q, media).ok()?; + Some((name, render)) + }); + let Some((name, fragment)) = it.next() else { + return Err(EmbedError::rest_search_matches_no_fragment(q, media)); + }; + if let Some((second_name, _)) = it.next() { + return Err(EmbedError::rest_search_matches_multiple_fragments( + name, + second_name, + q, + media, + )); + } + + Ok(fragment) + } + + pub fn request_from_fragments<'a, S: Serialize + 'a>( + &self, + fragments: impl IntoIterator, + ) -> Option { + self.request.inject(fragments.into_iter().map(|fragment| serde_json::json!(fragment))).ok() + } +} + +#[derive(Debug)] +pub struct Response { + template: InjectableValue, +} + +impl Response { + pub fn new(template: Value, request: &RequestData) -> Result { + let template = + match InjectableValue::new(template, RESPONSE_PLACEHOLDER, REPEAT_PLACEHOLDER) { + Ok(template) => template, + Err(error) => { + let message = + error.error_message("response", RESPONSE_PLACEHOLDER, REPEAT_PLACEHOLDER); + return Err(NewEmbedderError::rest_could_not_parse_template(message)); + } + }; + + match (template.has_array_value(), request.input_type() == InputType::TextArray) { (true, true) | (false, false) => Ok(Self {template}), (true, false) => Err(NewEmbedderError::rest_could_not_parse_template("in `response`: `response` has multiple embeddings, but `request` has only one text to embed".to_string())), (false, true) => Err(NewEmbedderError::rest_could_not_parse_template("in `response`: `response` has a single embedding, but `request` has multiple texts to embed".to_string())), } } - pub fn extract_embeddings( - &self, - response: serde_json::Value, - ) -> Result, EmbedError> { + pub fn extract_embeddings(&self, response: Value) -> Result, EmbedError> { let extracted_values: Vec = match self.template.extract(response) { Ok(extracted_values) => extracted_values, Err(error) => { diff --git a/crates/milli/src/vector/session.rs b/crates/milli/src/vector/session.rs new file mode 100644 index 000000000..b582bd840 --- /dev/null +++ b/crates/milli/src/vector/session.rs @@ -0,0 +1,177 @@ +use bumpalo::collections::Vec as BVec; +use bumpalo::Bump; +use serde_json::Value; + +use super::{EmbedError, Embedder, Embedding}; +use crate::progress::EmbedderStats; +use crate::{DocumentId, Result, ThreadPoolNoAbort}; +type ExtractorId = u8; + +#[derive(Clone, Copy)] +pub struct Metadata<'doc> { + pub docid: DocumentId, + pub external_docid: &'doc str, + pub extractor_id: ExtractorId, +} + +pub struct EmbeddingResponse<'doc> { + pub metadata: Metadata<'doc>, + pub embedding: Option, +} + +pub trait OnEmbed<'doc> { + type ErrorMetadata; + + fn process_embedding_response(&mut self, response: EmbeddingResponse<'doc>); + fn process_embedding_error( + &mut self, + error: EmbedError, + embedder_name: &'doc str, + unused_vectors_distribution: &Self::ErrorMetadata, + metadata: BVec<'doc, Metadata<'doc>>, + ) -> crate::Error; +} + +pub struct EmbedSession<'doc, C, I> { + // requests + inputs: BVec<'doc, I>, + metadata: BVec<'doc, Metadata<'doc>>, + + threads: &'doc ThreadPoolNoAbort, + embedder: &'doc Embedder, + + embedder_name: &'doc str, + + embedder_stats: &'doc EmbedderStats, + + on_embed: C, +} + +pub trait Input: Sized { + fn embed_ref( + inputs: &[Self], + embedder: &Embedder, + threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, + ) -> std::result::Result, EmbedError>; +} + +impl Input for &'_ str { + fn embed_ref( + inputs: &[Self], + embedder: &Embedder, + threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, + ) -> std::result::Result, EmbedError> { + embedder.embed_index_ref(inputs, threads, embedder_stats) + } +} + +impl Input for Value { + fn embed_ref( + inputs: &[Value], + embedder: &Embedder, + threads: &ThreadPoolNoAbort, + embedder_stats: &EmbedderStats, + ) -> std::result::Result, EmbedError> { + embedder.embed_index_ref_fragments(inputs, threads, embedder_stats) + } +} + +impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> { + #[allow(clippy::too_many_arguments)] + pub fn new( + embedder: &'doc Embedder, + embedder_name: &'doc str, + threads: &'doc ThreadPoolNoAbort, + doc_alloc: &'doc Bump, + embedder_stats: &'doc EmbedderStats, + on_embed: C, + ) -> Self { + let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint(); + let texts = BVec::with_capacity_in(capacity, doc_alloc); + let ids = BVec::with_capacity_in(capacity, doc_alloc); + Self { + inputs: texts, + metadata: ids, + embedder, + threads, + embedder_name, + embedder_stats, + on_embed, + } + } + + pub fn request_embedding( + &mut self, + metadata: Metadata<'doc>, + rendered: I, + unused_vectors_distribution: &C::ErrorMetadata, + ) -> Result<()> { + if self.inputs.len() < self.inputs.capacity() { + self.inputs.push(rendered); + self.metadata.push(metadata); + return Ok(()); + } + + self.embed_chunks(unused_vectors_distribution) + } + + pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result { + self.embed_chunks(unused_vectors_distribution)?; + Ok(self.on_embed) + } + + #[allow(clippy::too_many_arguments)] + fn embed_chunks(&mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<()> { + if self.inputs.is_empty() { + return Ok(()); + } + let res = match I::embed_ref( + self.inputs.as_slice(), + self.embedder, + self.threads, + self.embedder_stats, + ) { + Ok(embeddings) => { + for (metadata, embedding) in self.metadata.iter().copied().zip(embeddings) { + self.on_embed.process_embedding_response(EmbeddingResponse { + metadata, + embedding: Some(embedding), + }); + } + Ok(()) + } + Err(error) => { + // reset metadata and inputs, and send metadata to the error processing. + let doc_alloc = self.metadata.bump(); + let metadata = std::mem::replace( + &mut self.metadata, + BVec::with_capacity_in(self.inputs.capacity(), doc_alloc), + ); + self.inputs.clear(); + return Err(self.on_embed.process_embedding_error( + error, + self.embedder_name, + unused_vectors_distribution, + metadata, + )); + } + }; + self.inputs.clear(); + self.metadata.clear(); + res + } + + pub(crate) fn embedder_name(&self) -> &'doc str { + self.embedder_name + } + + pub(crate) fn doc_alloc(&self) -> &'doc Bump { + self.inputs.bump() + } + + pub(crate) fn on_embed_mut(&mut self) -> &mut C { + &mut self.on_embed + } +} diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index 3948ad4d8..1b85dd503 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -2,6 +2,8 @@ use std::collections::BTreeMap; use std::num::NonZeroUsize; use deserr::Deserr; +use either::Either; +use itertools::Itertools; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; @@ -33,6 +35,7 @@ pub struct EmbeddingSettings { /// /// - Defaults to `openAi` pub source: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -55,6 +58,7 @@ pub struct EmbeddingSettings { /// - For source `openAi`, defaults to `text-embedding-3-small` /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5` pub model: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -75,6 +79,7 @@ pub struct EmbeddingSettings { /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e` /// - Otherwise, defaults to `null` pub revision: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -96,6 +101,7 @@ pub struct EmbeddingSettings { /// /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior. pub pooling: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -118,6 +124,7 @@ pub struct EmbeddingSettings { /// /// - This setting is partially hidden when returned by the settings pub api_key: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -141,6 +148,7 @@ pub struct EmbeddingSettings { /// - For source `openAi`, the dimensions is the maximum allowed by the model. /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text. pub dimensions: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -167,6 +175,7 @@ pub struct EmbeddingSettings { /// first enabling it. If you are unsure of whether the performance-relevancy tradeoff is right for you, /// we recommend to use this parameter on a test index first. pub binary_quantized: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -183,6 +192,7 @@ pub struct EmbeddingSettings { /// /// - 🏗️ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text. pub document_template: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -201,6 +211,7 @@ pub struct EmbeddingSettings { /// /// - Defaults to 400 pub document_template_max_bytes: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -219,6 +230,36 @@ pub struct EmbeddingSettings { /// - 🌱 When modified for source `openAi`, embeddings are never regenerated /// - 🏗️ When modified for sources `ollama` and `rest`, embeddings are always regenerated pub url: Setting, + + /// Template fragments that will be reassembled and sent to the remote embedder at indexing time. + /// + /// # Availability + /// + /// - This parameter is available for sources `rest`. + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When a fragment is deleted by passing `null` to its name, the corresponding embeddings are removed from documents. + /// - 🏗️ When a fragment is modified, the corresponding embeddings are regenerated if their rendered version changes. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + pub indexing_fragments: Setting>>, + + /// Template fragments that will be reassembled and sent to the remote embedder at search time. + /// + /// # Availability + /// + /// - This parameter is available for sources `rest`. + /// + /// # 🔄 Reindexing + /// + /// - 🌱 Changing the value of this parameter never regenerates embeddings + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + pub search_fragments: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -236,6 +277,7 @@ pub struct EmbeddingSettings { /// /// - 🏗️ Changing the value of this parameter always regenerates embeddings pub request: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -253,6 +295,7 @@ pub struct EmbeddingSettings { /// /// - 🏗️ Changing the value of this parameter always regenerates embeddings pub response: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option>)] @@ -471,6 +514,36 @@ pub struct SubEmbeddingSettings { /// - 🌱 When modified for source `openAi`, embeddings are never regenerated /// - 🏗️ When modified for sources `ollama` and `rest`, embeddings are always regenerated pub url: Setting, + + /// Template fragments that will be reassembled and sent to the remote embedder at indexing time. + /// + /// # Availability + /// + /// - This parameter is available for sources `rest`. + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When a fragment is deleted by passing `null` to its name, the corresponding embeddings are removed from documents. + /// - 🏗️ When a fragment is modified, the corresponding embeddings are regenerated if their rendered version changes. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + pub indexing_fragments: Setting>>, + + /// Template fragments that will be reassembled and sent to the remote embedder at search time. + /// + /// # Availability + /// + /// - This parameter is available for sources `rest`. + /// + /// # 🔄 Reindexing + /// + /// - 🌱 Changing the value of this parameter never regenerates embeddings + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + pub search_fragments: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -542,17 +615,31 @@ pub struct SubEmbeddingSettings { pub indexing_embedder: Setting, } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum EmbeddingValidationContext { + FullSettings, + SettingsPartialUpdate, +} + /// Indicates what action should take place during a reindexing operation for an embedder -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ReindexAction { /// An indexing operation should take place for this embedder, keeping existing vectors /// and checking whether the document template changed or not RegeneratePrompts, + RegenerateFragments(Vec<(String, RegenerateFragment)>), /// An indexing operation should take place for all documents for this embedder, removing existing vectors /// (except userProvided ones) FullReindex, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RegenerateFragment { + Update, + Remove, + Add, +} + pub enum SettingsDiff { Remove, Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool }, @@ -565,6 +652,12 @@ pub struct EmbedderAction { pub is_being_quantized: bool, pub write_back: Option, pub reindex: Option, + pub remove_fragments: Option, +} + +#[derive(Debug)] +pub struct RemoveFragments { + pub fragment_ids: Vec, } impl EmbedderAction { @@ -580,6 +673,10 @@ impl EmbedderAction { self.reindex.as_ref() } + pub fn remove_fragments(&self) -> Option<&RemoveFragments> { + self.remove_fragments.as_ref() + } + pub fn with_is_being_quantized(mut self, quantize: bool) -> Self { self.is_being_quantized = quantize; self @@ -591,11 +688,23 @@ impl EmbedderAction { is_being_quantized: false, write_back: Some(write_back), reindex: None, + remove_fragments: None, } } pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self { - Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) } + Self { + was_quantized, + is_being_quantized: false, + write_back: None, + reindex: Some(reindex), + remove_fragments: None, + } + } + + pub fn with_remove_fragments(mut self, remove_fragments: RemoveFragments) -> Self { + self.remove_fragments = Some(remove_fragments); + self } } @@ -622,6 +731,8 @@ impl SettingsDiff { mut dimensions, mut document_template, mut url, + mut indexing_fragments, + mut search_fragments, mut request, mut response, mut search_embedder, @@ -641,6 +752,8 @@ impl SettingsDiff { dimensions: new_dimensions, document_template: new_document_template, url: new_url, + indexing_fragments: new_indexing_fragments, + search_fragments: new_search_fragments, request: new_request, response: new_response, search_embedder: new_search_embedder, @@ -672,6 +785,8 @@ impl SettingsDiff { &mut document_template, &mut document_template_max_bytes, &mut url, + &mut indexing_fragments, + &mut search_fragments, &mut request, &mut response, &mut headers, @@ -684,6 +799,8 @@ impl SettingsDiff { new_document_template, new_document_template_max_bytes, new_url, + new_indexing_fragments, + new_search_fragments, new_request, new_response, new_headers, @@ -710,6 +827,8 @@ impl SettingsDiff { dimensions, document_template, url, + indexing_fragments, + search_fragments, request, response, search_embedder, @@ -757,6 +876,8 @@ impl SettingsDiff { mut document_template, mut document_template_max_bytes, mut url, + mut indexing_fragments, + mut search_fragments, mut request, mut response, mut headers, @@ -782,6 +903,8 @@ impl SettingsDiff { document_template: new_document_template, document_template_max_bytes: new_document_template_max_bytes, url: new_url, + indexing_fragments: new_indexing_fragments, + search_fragments: new_search_fragments, request: new_request, response: new_response, headers: new_headers, @@ -802,6 +925,8 @@ impl SettingsDiff { &mut document_template, &mut document_template_max_bytes, &mut url, + &mut indexing_fragments, + &mut search_fragments, &mut request, &mut response, &mut headers, @@ -814,6 +939,8 @@ impl SettingsDiff { new_document_template, new_document_template_max_bytes, new_url, + new_indexing_fragments, + new_search_fragments, new_request, new_response, new_headers, @@ -834,6 +961,8 @@ impl SettingsDiff { dimensions, document_template, url, + indexing_fragments, + search_fragments, request, response, headers, @@ -863,6 +992,8 @@ impl SettingsDiff { document_template: &mut Setting, document_template_max_bytes: &mut Setting, url: &mut Setting, + indexing_fragments: &mut Setting>>, + search_fragments: &mut Setting>>, request: &mut Setting, response: &mut Setting, headers: &mut Setting>, @@ -875,6 +1006,8 @@ impl SettingsDiff { new_document_template: Setting, new_document_template_max_bytes: Setting, new_url: Setting, + new_indexing_fragments: Setting>>, + new_search_fragments: Setting>>, new_request: Setting, new_response: Setting, new_headers: Setting>, @@ -890,6 +1023,8 @@ impl SettingsDiff { pooling, dimensions, url, + indexing_fragments, + search_fragments, request, response, document_template, @@ -929,6 +1064,105 @@ impl SettingsDiff { } } } + + *search_fragments = match (std::mem::take(search_fragments), new_search_fragments) { + (Setting::Set(search_fragments), Setting::Set(new_search_fragments)) => { + Setting::Set( + search_fragments + .into_iter() + .merge_join_by(new_search_fragments, |(left, _), (right, _)| { + left.cmp(right) + }) + .map(|eob| { + match eob { + // merge fragments + itertools::EitherOrBoth::Both((name, _), (_, right)) => { + (name, right) + } + // unchanged fragment + itertools::EitherOrBoth::Left(left) => left, + // new fragment + itertools::EitherOrBoth::Right(right) => right, + } + }) + .collect(), + ) + } + (_, Setting::Reset) => Setting::Reset, + (left, Setting::NotSet) => left, + (Setting::NotSet | Setting::Reset, Setting::Set(new_search_fragments)) => { + Setting::Set(new_search_fragments) + } + }; + + let mut regenerate_fragments = Vec::new(); + *indexing_fragments = match (std::mem::take(indexing_fragments), new_indexing_fragments) { + (Setting::Set(fragments), Setting::Set(new_fragments)) => { + Setting::Set( + fragments + .into_iter() + .merge_join_by(new_fragments, |(left, _), (right, _)| left.cmp(right)) + .map(|eob| { + match eob { + // merge fragments + itertools::EitherOrBoth::Both( + (name, left), + (other_name, right), + ) => { + if left == right { + (name, left) + } else { + match right { + Some(right) => { + regenerate_fragments + .push((other_name, RegenerateFragment::Update)); + (name, Some(right)) + } + None => { + regenerate_fragments + .push((other_name, RegenerateFragment::Remove)); + (name, None) + } + } + } + } + // unchanged fragment + itertools::EitherOrBoth::Left(left) => left, + // new fragment + itertools::EitherOrBoth::Right((name, right)) => { + if right.is_some() { + regenerate_fragments + .push((name.clone(), RegenerateFragment::Add)); + } + (name, right) + } + } + }) + .collect(), + ) + } + // remove all fragments => move to document template + (_, Setting::Reset) => { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + Setting::Reset + } + // add all fragments + (Setting::NotSet | Setting::Reset, Setting::Set(new_fragments)) => { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + + Setting::Set(new_fragments) + } + // no change + (left, Setting::NotSet) => left, + }; + if !regenerate_fragments.is_empty() { + regenerate_fragments.sort_unstable_by(|(left, _), (right, _)| left.cmp(right)); + ReindexAction::push_action( + reindex_action, + ReindexAction::RegenerateFragments(regenerate_fragments), + ); + } + if request.apply(new_request) { ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); } @@ -960,10 +1194,16 @@ impl SettingsDiff { impl ReindexAction { fn push_action(this: &mut Option, other: Self) { - *this = match (*this, other) { - (_, ReindexAction::FullReindex) => Some(ReindexAction::FullReindex), - (Some(ReindexAction::FullReindex), _) => Some(ReindexAction::FullReindex), - (_, ReindexAction::RegeneratePrompts) => Some(ReindexAction::RegeneratePrompts), + use ReindexAction::*; + *this = match (this.take(), other) { + (_, FullReindex) => Some(FullReindex), + (Some(FullReindex), _) => Some(FullReindex), + (_, RegenerateFragments(fragments)) => Some(RegenerateFragments(fragments)), + (Some(RegenerateFragments(fragments)), RegeneratePrompts) => { + Some(RegenerateFragments(fragments)) + } + (Some(RegeneratePrompts), RegeneratePrompts) => Some(RegeneratePrompts), + (None, RegeneratePrompts) => Some(RegeneratePrompts), } } } @@ -976,6 +1216,8 @@ fn apply_default_for_source( pooling: &mut Setting, dimensions: &mut Setting, url: &mut Setting, + indexing_fragments: &mut Setting>>, + search_fragments: &mut Setting>>, request: &mut Setting, response: &mut Setting, document_template: &mut Setting, @@ -991,6 +1233,8 @@ fn apply_default_for_source( *pooling = Setting::Reset; *dimensions = Setting::NotSet; *url = Setting::NotSet; + *indexing_fragments = Setting::NotSet; + *search_fragments = Setting::NotSet; *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; @@ -1003,6 +1247,8 @@ fn apply_default_for_source( *pooling = Setting::NotSet; *dimensions = Setting::Reset; *url = Setting::NotSet; + *indexing_fragments = Setting::NotSet; + *search_fragments = Setting::NotSet; *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; @@ -1015,6 +1261,8 @@ fn apply_default_for_source( *pooling = Setting::NotSet; *dimensions = Setting::NotSet; *url = Setting::Reset; + *indexing_fragments = Setting::NotSet; + *search_fragments = Setting::NotSet; *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; @@ -1027,6 +1275,8 @@ fn apply_default_for_source( *pooling = Setting::NotSet; *dimensions = Setting::Reset; *url = Setting::Reset; + *indexing_fragments = Setting::Reset; + *search_fragments = Setting::Reset; *request = Setting::Reset; *response = Setting::Reset; *headers = Setting::Reset; @@ -1039,6 +1289,8 @@ fn apply_default_for_source( *pooling = Setting::NotSet; *dimensions = Setting::Reset; *url = Setting::NotSet; + *indexing_fragments = Setting::NotSet; + *search_fragments = Setting::NotSet; *request = Setting::NotSet; *response = Setting::NotSet; *document_template = Setting::NotSet; @@ -1053,6 +1305,8 @@ fn apply_default_for_source( *pooling = Setting::NotSet; *dimensions = Setting::NotSet; *url = Setting::NotSet; + *indexing_fragments = Setting::NotSet; + *search_fragments = Setting::NotSet; *request = Setting::NotSet; *response = Setting::NotSet; *document_template = Setting::NotSet; @@ -1119,6 +1373,8 @@ pub enum MetaEmbeddingSetting { DocumentTemplate, DocumentTemplateMaxBytes, Url, + IndexingFragments, + SearchFragments, Request, Response, Headers, @@ -1141,6 +1397,8 @@ impl MetaEmbeddingSetting { DocumentTemplate => "documentTemplate", DocumentTemplateMaxBytes => "documentTemplateMaxBytes", Url => "url", + IndexingFragments => "indexingFragments", + SearchFragments => "searchFragments", Request => "request", Response => "response", Headers => "headers", @@ -1164,6 +1422,8 @@ impl EmbeddingSettings { dimensions: &Setting, api_key: &Setting, url: &Setting, + indexing_fragments: &Setting>>, + search_fragments: &Setting>>, request: &Setting, response: &Setting, document_template: &Setting, @@ -1198,6 +1458,20 @@ impl EmbeddingSettings { )?; Self::check_setting(embedder_name, source, MetaEmbeddingSetting::ApiKey, context, api_key)?; Self::check_setting(embedder_name, source, MetaEmbeddingSetting::Url, context, url)?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::IndexingFragments, + context, + indexing_fragments, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::SearchFragments, + context, + search_fragments, + )?; Self::check_setting( embedder_name, source, @@ -1336,8 +1610,8 @@ impl EmbeddingSettings { ) => FieldStatus::Allowed, ( OpenAi, - Revision | Pooling | Request | Response | Headers | SearchEmbedder - | IndexingEmbedder, + Revision | Pooling | IndexingFragments | SearchFragments | Request | Response + | Headers | SearchEmbedder | IndexingEmbedder, _, ) => FieldStatus::Disallowed, ( @@ -1347,8 +1621,8 @@ impl EmbeddingSettings { ) => FieldStatus::Allowed, ( HuggingFace, - ApiKey | Dimensions | Url | Request | Response | Headers | SearchEmbedder - | IndexingEmbedder, + ApiKey | Dimensions | Url | IndexingFragments | SearchFragments | Request + | Response | Headers | SearchEmbedder | IndexingEmbedder, _, ) => FieldStatus::Disallowed, (Ollama, Model, _) => FieldStatus::Mandatory, @@ -1359,8 +1633,8 @@ impl EmbeddingSettings { ) => FieldStatus::Allowed, ( Ollama, - Revision | Pooling | Request | Response | Headers | SearchEmbedder - | IndexingEmbedder, + Revision | Pooling | IndexingFragments | SearchFragments | Request | Response + | Headers | SearchEmbedder | IndexingEmbedder, _, ) => FieldStatus::Disallowed, (UserProvided, Dimensions, _) => FieldStatus::Mandatory, @@ -1374,6 +1648,8 @@ impl EmbeddingSettings { | DocumentTemplate | DocumentTemplateMaxBytes | Url + | IndexingFragments + | SearchFragments | Request | Response | Headers @@ -1392,6 +1668,10 @@ impl EmbeddingSettings { | Headers, _, ) => FieldStatus::Allowed, + (Rest, IndexingFragments, NotNested | Indexing) => FieldStatus::Allowed, + (Rest, IndexingFragments, Search) => FieldStatus::Disallowed, + (Rest, SearchFragments, NotNested | Search) => FieldStatus::Allowed, + (Rest, SearchFragments, Indexing) => FieldStatus::Disallowed, (Rest, Model | Revision | Pooling | SearchEmbedder | IndexingEmbedder, _) => { FieldStatus::Disallowed } @@ -1407,6 +1687,8 @@ impl EmbeddingSettings { | DocumentTemplate | DocumentTemplateMaxBytes | Url + | IndexingFragments + | SearchFragments | Request | Response | Headers, @@ -1500,6 +1782,11 @@ impl std::fmt::Display for EmbedderSource { } } +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] +pub struct Fragment { + pub value: serde_json::Value, +} + impl EmbeddingSettings { fn from_hugging_face( super::hf::EmbedderOptions { @@ -1522,6 +1809,8 @@ impl EmbeddingSettings { document_template, document_template_max_bytes, url: Setting::NotSet, + indexing_fragments: Setting::NotSet, + search_fragments: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, @@ -1554,6 +1843,8 @@ impl EmbeddingSettings { document_template, document_template_max_bytes, url: Setting::some_or_not_set(url), + indexing_fragments: Setting::NotSet, + search_fragments: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, @@ -1586,6 +1877,8 @@ impl EmbeddingSettings { document_template, document_template_max_bytes, url: Setting::some_or_not_set(url), + indexing_fragments: Setting::NotSet, + search_fragments: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, @@ -1610,6 +1903,8 @@ impl EmbeddingSettings { document_template: Setting::NotSet, document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, + indexing_fragments: Setting::NotSet, + search_fragments: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, @@ -1626,6 +1921,8 @@ impl EmbeddingSettings { dimensions, url, request, + indexing_fragments, + search_fragments, response, distribution, headers, @@ -1641,9 +1938,39 @@ impl EmbeddingSettings { pooling: Setting::NotSet, api_key: Setting::some_or_not_set(api_key), dimensions: Setting::some_or_not_set(dimensions), - document_template, - document_template_max_bytes, + document_template: if indexing_fragments.is_empty() && search_fragments.is_empty() { + document_template + } else { + Setting::NotSet + }, + document_template_max_bytes: if indexing_fragments.is_empty() + && search_fragments.is_empty() + { + document_template_max_bytes + } else { + Setting::NotSet + }, url: Setting::Set(url), + indexing_fragments: if indexing_fragments.is_empty() { + Setting::NotSet + } else { + Setting::Set( + indexing_fragments + .into_iter() + .map(|(name, fragment)| (name, Some(Fragment { value: fragment }))) + .collect(), + ) + }, + search_fragments: if search_fragments.is_empty() { + Setting::NotSet + } else { + Setting::Set( + search_fragments + .into_iter() + .map(|(name, fragment)| (name, Some(Fragment { value: fragment }))) + .collect(), + ) + }, request: Setting::Set(request), response: Setting::Set(response), distribution: Setting::some_or_not_set(distribution), @@ -1702,6 +2029,8 @@ impl From for EmbeddingSettings { document_template: Setting::NotSet, document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, + indexing_fragments: Setting::NotSet, + search_fragments: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, @@ -1774,6 +2103,8 @@ impl From for SubEmbeddingSettings { document_template, document_template_max_bytes, url, + indexing_fragments, + search_fragments, request, response, headers, @@ -1792,6 +2123,8 @@ impl From for SubEmbeddingSettings { document_template, document_template_max_bytes, url, + indexing_fragments, + search_fragments, request, response, headers, @@ -1816,6 +2149,8 @@ impl From for EmbeddingConfig { document_template, document_template_max_bytes, url, + indexing_fragments, + search_fragments, request, response, distribution, @@ -1867,6 +2202,8 @@ impl From for EmbeddingConfig { EmbedderSource::Rest => SubEmbedderOptions::rest( url.set().unwrap(), api_key, + indexing_fragments, + search_fragments, request.set().unwrap(), response.set().unwrap(), headers, @@ -1910,6 +2247,8 @@ impl SubEmbedderOptions { document_template: _, document_template_max_bytes: _, url, + indexing_fragments, + search_fragments, request, response, headers, @@ -1932,6 +2271,8 @@ impl SubEmbedderOptions { EmbedderSource::Rest => Self::rest( url.set().unwrap(), api_key, + indexing_fragments, + search_fragments, request.set().unwrap(), response.set().unwrap(), headers, @@ -1998,9 +2339,13 @@ impl SubEmbedderOptions { distribution: distribution.set(), }) } + + #[allow(clippy::too_many_arguments)] fn rest( url: String, api_key: Setting, + indexing_fragments: Setting>>, + search_fragments: Setting>>, request: serde_json::Value, response: serde_json::Value, headers: Setting>, @@ -2015,6 +2360,22 @@ impl SubEmbedderOptions { response, distribution: distribution.set(), headers: headers.set().unwrap_or_default(), + search_fragments: search_fragments + .set() + .unwrap_or_default() + .into_iter() + .filter_map(|(name, fragment)| { + Some((name, fragment.map(|fragment| fragment.value)?)) + }) + .collect(), + indexing_fragments: indexing_fragments + .set() + .unwrap_or_default() + .into_iter() + .filter_map(|(name, fragment)| { + Some((name, fragment.map(|fragment| fragment.value)?)) + }) + .collect(), }) } fn ollama( @@ -2054,3 +2415,29 @@ impl From for EmbedderOptions { } } } + +pub(crate) fn fragments_from_settings( + setting: &Setting, +) -> impl Iterator + '_ { + let Some(setting) = setting.as_ref().set() else { return Either::Left(None.into_iter()) }; + + let filter_map = |(name, fragment): (&String, &Option)| { + if fragment.is_some() { + Some(name.clone()) + } else { + None + } + }; + + if let Some(setting) = setting.indexing_fragments.as_ref().set() { + Either::Right(setting.iter().filter_map(filter_map)) + } else { + let Some(setting) = setting.indexing_embedder.as_ref().set() else { + return Either::Left(None.into_iter()); + }; + let Some(setting) = setting.indexing_fragments.as_ref().set() else { + return Either::Left(None.into_iter()); + }; + Either::Right(setting.iter().filter_map(filter_map)) + } +} diff --git a/crates/milli/tests/search/distinct.rs b/crates/milli/tests/search/distinct.rs index fc890dfe8..c7fa9befa 100644 --- a/crates/milli/tests/search/distinct.rs +++ b/crates/milli/tests/search/distinct.rs @@ -1,6 +1,7 @@ use std::collections::HashSet; use big_s::S; +use milli::progress::Progress; use milli::update::Settings; use milli::{Criterion, Search, SearchResult, TermsMatchingStrategy}; use Criterion::*; @@ -19,7 +20,7 @@ macro_rules! test_distinct { let config = milli::update::IndexerConfig::default(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_distinct_field(S(stringify!($distinct))); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs index 8934cbea4..cc1b85369 100644 --- a/crates/milli/tests/search/facet_distribution.rs +++ b/crates/milli/tests/search/facet_distribution.rs @@ -5,7 +5,7 @@ use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; -use milli::vector::EmbeddingConfigs; +use milli::vector::RuntimeEmbedders; use milli::{FacetDistribution, FilterableAttributesRule, Index, Object, OrderBy}; use serde_json::{from_value, json}; @@ -25,7 +25,7 @@ fn test_facet_distribution_with_no_facet_values() { FilterableAttributesRule::Field(S("genres")), FilterableAttributesRule::Field(S("tags")), ]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); // index documents @@ -35,7 +35,7 @@ fn test_facet_distribution_with_no_facet_values() { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let doc1: Object = from_value( @@ -74,6 +74,7 @@ fn test_facet_distribution_with_no_facet_values() { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index 906956716..fa03f1cc1 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -10,7 +10,7 @@ use maplit::{btreemap, hashset}; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; -use milli::vector::EmbeddingConfigs; +use milli::vector::RuntimeEmbedders; use milli::{ AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy, }; @@ -63,7 +63,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("america") => vec![S("the united states")], }); builder.set_searchable_fields(vec![S("title"), S("description")]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); // index documents @@ -74,7 +74,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); @@ -114,6 +114,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); diff --git a/crates/milli/tests/search/phrase_search.rs b/crates/milli/tests/search/phrase_search.rs index b7f792bfc..397729c20 100644 --- a/crates/milli/tests/search/phrase_search.rs +++ b/crates/milli/tests/search/phrase_search.rs @@ -1,3 +1,4 @@ +use milli::progress::Progress; use milli::update::{IndexerConfig, Settings}; use milli::{Criterion, Index, Search, TermsMatchingStrategy}; @@ -10,7 +11,7 @@ fn set_stop_words(index: &Index, stop_words: &[&str]) { let mut builder = Settings::new(&mut wtxn, index, &config); let stop_words = stop_words.iter().map(|s| s.to_string()).collect(); builder.set_stop_words(stop_words); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); } diff --git a/crates/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs index 1acc89484..3f8134085 100644 --- a/crates/milli/tests/search/query_criteria.rs +++ b/crates/milli/tests/search/query_criteria.rs @@ -8,7 +8,7 @@ use maplit::hashset; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; -use milli::vector::EmbeddingConfigs; +use milli::vector::RuntimeEmbedders; use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy}; use rand::Rng; use Criterion::*; @@ -236,7 +236,7 @@ fn criteria_mixup() { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(criteria.clone()); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); @@ -276,7 +276,7 @@ fn criteria_ascdesc() { S("name"), S("age"), }); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let mut wtxn = index.write_txn().unwrap(); @@ -288,7 +288,7 @@ fn criteria_ascdesc() { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); @@ -344,6 +344,7 @@ fn criteria_ascdesc() { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -358,7 +359,7 @@ fn criteria_ascdesc() { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(vec![criterion.clone()]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); diff --git a/crates/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs index 3c0717063..95ff85165 100644 --- a/crates/milli/tests/search/typo_tolerance.rs +++ b/crates/milli/tests/search/typo_tolerance.rs @@ -6,7 +6,7 @@ use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; -use milli::vector::EmbeddingConfigs; +use milli::vector::RuntimeEmbedders; use milli::{Criterion, Index, Object, Search, TermsMatchingStrategy}; use serde_json::from_value; use tempfile::tempdir; @@ -46,7 +46,7 @@ fn test_typo_tolerance_one_typo() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut txn, &index, &config); builder.set_min_word_len_one_typo(4); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); // typo is now supported for 4 letters words let mut search = Search::new(&txn, &index); @@ -92,7 +92,7 @@ fn test_typo_tolerance_two_typo() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut txn, &index, &config); builder.set_min_word_len_two_typos(7); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); // typo is now supported for 4 letters words let mut search = Search::new(&txn, &index); @@ -123,7 +123,7 @@ fn test_typo_disabled_on_word() { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); + let embedders = RuntimeEmbedders::default(); let mut indexer = indexer::DocumentOperation::new(); indexer.replace_documents(&documents).unwrap(); @@ -153,6 +153,7 @@ fn test_typo_disabled_on_word() { embedders, &|| false, &Progress::default(), + &Default::default(), ) .unwrap(); @@ -180,7 +181,7 @@ fn test_typo_disabled_on_word() { // `zealand` doesn't allow typos anymore exact_words.insert("zealand".to_string()); builder.set_exact_words(exact_words); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); let mut search = Search::new(&txn, &index); search.query("zealand"); @@ -218,7 +219,7 @@ fn test_disable_typo_on_attribute() { let mut builder = Settings::new(&mut txn, &index, &config); // disable typos on `description` builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect()); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); let mut search = Search::new(&txn, &index); search.query("antebelum"); diff --git a/crates/tracing-trace/Cargo.toml b/crates/tracing-trace/Cargo.toml index 2cd4f7a74..866a982a0 100644 --- a/crates/tracing-trace/Cargo.toml +++ b/crates/tracing-trace/Cargo.toml @@ -6,10 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -color-spantrace = "0.2.1" +color-spantrace = "0.3.0" fxprof-processed-profile = "0.7.0" -serde = { version = "1.0.217", features = ["derive"] } -serde_json = "1.0.135" +serde = { version = "1.0.219", features = ["derive"] } +serde_json = "1.0.140" tracing = "0.1.41" tracing-error = "0.2.1" tracing-subscriber = "0.3.19" @@ -18,7 +18,7 @@ byte-unit = { version = "5.1.6", default-features = false, features = [ "byte", "serde", ] } -tokio = { version = "1.43.1", features = ["sync"] } +tokio = { version = "1.45.1", features = ["sync"] } [target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies] libproc = "0.14.10" diff --git a/crates/xtask/Cargo.toml b/crates/xtask/Cargo.toml index a9ef79cd6..5fdf157df 100644 --- a/crates/xtask/Cargo.toml +++ b/crates/xtask/Cargo.toml @@ -11,27 +11,27 @@ license.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = "1.0.95" +anyhow = "1.0.98" build-info = { version = "1.7.0", path = "../build-info" } -cargo_metadata = "0.19.1" -clap = { version = "4.5.24", features = ["derive"] } +cargo_metadata = "0.20.0" +clap = { version = "4.5.40", features = ["derive"] } futures-core = "0.3.31" futures-util = "0.3.31" -reqwest = { version = "0.12.12", features = [ +reqwest = { version = "0.12.20", features = [ "stream", "json", "rustls-tls", ], default-features = false } -serde = { version = "1.0.217", features = ["derive"] } -serde_json = "1.0.135" -sha2 = "0.10.8" -sysinfo = "0.33.1" -time = { version = "0.3.37", features = [ +serde = { version = "1.0.219", features = ["derive"] } +serde_json = "1.0.140" +sha2 = "0.10.9" +sysinfo = "0.35.2" +time = { version = "0.3.41", features = [ "serde", "serde-human-readable", "macros", ] } -tokio = { version = "1.43.1", features = [ +tokio = { version = "1.45.1", features = [ "rt", "net", "time", @@ -41,4 +41,4 @@ tokio = { version = "1.43.1", features = [ tracing = "0.1.41" tracing-subscriber = "0.3.19" tracing-trace = { version = "0.1.0", path = "../tracing-trace" } -uuid = { version = "1.11.0", features = ["v7", "serde"] } +uuid = { version = "1.17.0", features = ["v7", "serde"] }