Compare commits

..

1 Commits

Author SHA1 Message Date
ManyTheFish
db2f22df25 Add a check ensuring there is no modification in the fid dbs 2025-03-12 11:04:34 +01:00
120 changed files with 1017 additions and 2875 deletions

View File

@@ -6,7 +6,11 @@ on:
# Everyday at 5:00am # Everyday at 5:00am
- cron: "0 5 * * *" - cron: "0 5 * * *"
pull_request: pull_request:
merge_group: push:
# trying and staging branches are for Bors config
branches:
- trying
- staging
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always

View File

@@ -150,7 +150,7 @@ Some notes on GitHub PRs:
- The PR title should be accurate and descriptive of the changes. - The PR title should be accurate and descriptive of the changes.
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br> - [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
The draft PRs are recommended when you want to show that you are working on something and make your work visible. The draft PRs are recommended when you want to show that you are working on something and make your work visible.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually. - The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually.
## Release Process (for internal team only) ## Release Process (for internal team only)
@@ -158,7 +158,8 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
### Automation to rebase and Merge the PRs ### Automation to rebase and Merge the PRs
This project uses GitHub Merge Queues that helps us manage pull requests merging. This project integrates a bot that helps us manage pull requests merging.<br>
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
### How to Publish a new Release ### How to Publish a new Release

364
Cargo.lock generated
View File

@@ -47,7 +47,7 @@ dependencies = [
"actix-utils", "actix-utils",
"ahash 0.8.11", "ahash 0.8.11",
"base64 0.22.1", "base64 0.22.1",
"bitflags 2.9.0", "bitflags 2.6.0",
"brotli", "brotli",
"bytes", "bytes",
"bytestring", "bytestring",
@@ -258,7 +258,7 @@ version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
dependencies = [ dependencies = [
"getrandom 0.2.15", "getrandom",
"once_cell", "once_cell",
"version_check", "version_check",
] ]
@@ -271,7 +271,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"const-random", "const-random",
"getrandom 0.2.15", "getrandom",
"once_cell", "once_cell",
"version_check", "version_check",
"zerocopy", "zerocopy",
@@ -393,24 +393,41 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]] [[package]]
name = "arroy" name = "arroy"
version = "0.6.1" version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6" checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
"enum-iterator",
"heed", "heed",
"log",
"memmap2", "memmap2",
"nohash", "nohash",
"ordered-float", "ordered-float",
"page_size",
"rand", "rand",
"rayon", "rayon",
"roaring", "roaring",
"tempfile", "tempfile",
"thiserror 2.0.9", "thiserror 1.0.69",
"tracing", ]
[[package]]
name = "arroy"
version = "0.5.0"
source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
dependencies = [
"bytemuck",
"byteorder",
"heed",
"log",
"memmap2",
"nohash",
"ordered-float",
"rand",
"rayon",
"roaring",
"tempfile",
"thiserror 1.0.69",
] ]
[[package]] [[package]]
@@ -486,7 +503,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bumpalo", "bumpalo",
@@ -536,7 +553,7 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.13.0", "itertools 0.13.0",
@@ -582,9 +599,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.0" version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@@ -677,7 +694,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@@ -790,20 +807,22 @@ dependencies = [
[[package]] [[package]]
name = "bzip2" name = "bzip2"
version = "0.5.2" version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
dependencies = [ dependencies = [
"bzip2-sys", "bzip2-sys",
"libc",
] ]
[[package]] [[package]]
name = "bzip2-sys" name = "bzip2-sys"
version = "0.1.13+1.0.8" version = "0.1.11+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
dependencies = [ dependencies = [
"cc", "cc",
"libc",
"pkg-config", "pkg-config",
] ]
@@ -925,13 +944,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.16" version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490"
dependencies = [ dependencies = [
"jobserver", "jobserver",
"libc", "libc",
"shlex", "once_cell",
] ]
[[package]] [[package]]
@@ -976,9 +995,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.9.3" version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650d52f87a36472ea1c803dee49d6bfd23d426efa9363e2f4c4a0e6a236d3407" checksum = "cf8921fe4d53ab8f9e8f9b72ce6f91726cfc40fffab1243d27db406b5e2e9cc2"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"csv", "csv",
@@ -1141,7 +1160,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [ dependencies = [
"getrandom 0.2.15", "getrandom",
"once_cell", "once_cell",
"tiny-keccak", "tiny-keccak",
] ]
@@ -1652,7 +1671,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -1854,7 +1873,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"tempfile", "tempfile",
"thiserror 2.0.9", "thiserror 2.0.9",
@@ -1876,7 +1895,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@@ -1896,7 +1915,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -2035,7 +2054,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"bumpalo", "bumpalo",
@@ -2063,7 +2082,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22" checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"debugid", "debugid",
"fxhash", "fxhash",
"serde", "serde",
@@ -2214,24 +2233,10 @@ dependencies = [
"cfg-if", "cfg-if",
"js-sys", "js-sys",
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi",
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "getrandom"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.13.3+wasi-0.2.2",
"wasm-bindgen",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "gimli" name = "gimli"
version = "0.27.3" version = "0.27.3"
@@ -2244,7 +2249,7 @@ version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724" checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"libc", "libc",
"libgit2-sys", "libgit2-sys",
"log", "log",
@@ -2392,11 +2397,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]] [[package]]
name = "heed" name = "heed"
version = "0.22.0" version = "0.20.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393" checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"byteorder", "byteorder",
"heed-traits", "heed-traits",
"heed-types", "heed-types",
@@ -2416,9 +2421,9 @@ checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
[[package]] [[package]]
name = "heed-types" name = "heed-types"
version = "0.21.0" version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d" checksum = "9d3f528b053a6d700b2734eabcd0fd49cb8230647aa72958467527b0b7917114"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -2738,14 +2743,14 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"big_s", "big_s",
"bincode", "bincode",
"bumpalo", "bumpalo",
"bumparaw-collections", "bumparaw-collections",
"byte-unit",
"convert_case 0.6.0", "convert_case 0.6.0",
"crossbeam-channel", "crossbeam-channel",
"csv", "csv",
@@ -2754,7 +2759,6 @@ dependencies = [
"enum-iterator", "enum-iterator",
"file-store", "file-store",
"flate2", "flate2",
"indexmap",
"insta", "insta",
"maplit", "maplit",
"meili-snap", "meili-snap",
@@ -2937,17 +2941,16 @@ dependencies = [
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.77" version = "0.3.69"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
dependencies = [ dependencies = [
"once_cell",
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -3010,9 +3013,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.171" version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]] [[package]]
name = "libgit2-sys" name = "libgit2-sys"
@@ -3077,9 +3080,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera" name = "lindera"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "832c220475557e3b44a46cad1862b57f010f0c6e93d771d0e628e08689c068b1" checksum = "c6cbc1aad631a7da0a7e9bc4b8669fa92ac9ca8eeb7b35a807376dd3034443ff"
dependencies = [ dependencies = [
"lindera-analyzer", "lindera-analyzer",
"lindera-core", "lindera-core",
@@ -3090,9 +3093,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-analyzer" name = "lindera-analyzer"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8e26651714abf5167e6b6a80f5cdaa0cad41c5fcb84d8ba96bebafcb9029339" checksum = "74508ffbb24e36905d1718b261460e378a748029b07bcd7e06f0d18500b8194c"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -3120,9 +3123,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-assets" name = "lindera-assets"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebb01f1ca53c1e642234c6c7fdb9ac664ad0c1ab9502f33e4200201bac7e6ce7" checksum = "6a677c371ecb3bd02b751be306ea09876cd47cf426303ad5f10a3fd6f9a4ded6"
dependencies = [ dependencies = [
"encoding", "encoding",
"flate2", "flate2",
@@ -3133,9 +3136,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict" name = "lindera-cc-cedict"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f7618d9aa947fdd7c38eae2b79f0fd237ecb5067608f1363610ba20d20ab5a8" checksum = "c35944000d05a177e981f037b5f0805f283b32f05a0c35713003bef136ca8cb4"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3147,9 +3150,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict-builder" name = "lindera-cc-cedict-builder"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efdbcb809d81428935d601a78c94bfb39500749213f7320705f427a7a1d31aec" checksum = "85b8f642bc9c9130682569975772a17336c6aab26d11fc0f823f3e663167ace6"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@@ -3159,9 +3162,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-compress" name = "lindera-compress"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eac178afa2456dac469d3b1a2d7fbaf3e1ea796a1f52321e8ac29545a53c239c" checksum = "a7825d8d63592aa5727d67bd209170ac82df56c369533efbf0ddbac277bb68ec"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@@ -3170,9 +3173,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-core" name = "lindera-core"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "649777465f48147ce593ab6db347e235e3af8f693a23f4437be94a1cdbdf5fdf" checksum = "0c28191456debc98af6aa5f7db77872471983e9fa2a737b1c232b6ef543aed62"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -3187,9 +3190,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-decompress" name = "lindera-decompress"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e3faaceb85e43ac250021866c6db3cdc9997b44b3d3ea498594d04edc91fc45" checksum = "4788a1ead2f63f3fc2888109272921dedd86a87b7d0bf05e9daab46600daac51"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@@ -3198,9 +3201,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary" name = "lindera-dictionary"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31e15b2d2d8a4ad45f2e373a084931cf3dfbde15f124044e2436bb920af3366c" checksum = "bdf5f91725e32b9a21b1656baa7030766c9bafc4de4b4ddeb8ffdde7224dd2f6"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -3223,9 +3226,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary-builder" name = "lindera-dictionary-builder"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59802949110545b59b663917ed3fd55dc3b3a8cde6bd20137d7fe24372cfb9aa" checksum = "e41f00ba7ac541b0ffd8c30e7a73f2dd197546cc5780462ec4f2e4782945a780"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -3245,9 +3248,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-filter" name = "lindera-filter"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1320f118c3fc9e897f4ebfc16864e5ef8c0b06ba769c0a50e53f193f9d682bf8" checksum = "273d27e01e1377e2647314a4a5b9bdca4b52a867b319069ebae8c10191146eca"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"csv", "csv",
@@ -3270,9 +3273,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic" name = "lindera-ipadic"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b4731bf3730f1f38266d7ee9bca7d460cd336645c9dfd4e6a1082e58ab1e993" checksum = "b97a52ff0af5acb700093badaf7078051ab9ffd9071859724445a60193995f1f"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3284,9 +3287,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-builder" name = "lindera-ipadic-builder"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "309966c12e682f67205c3cd3c8dc55bbdcd1eb3b5c7c5cb41fb8acd18906d340" checksum = "bf5031c52686128db13f774b2c5a8abfd52b4cc1f904041d8411aa19d630ce4d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@@ -3296,9 +3299,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd" name = "lindera-ipadic-neologd"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90e919b4cfb9962d24ee1e1d50a7c163bbf356376495ad66d1996e20b9f9e44" checksum = "d6b36764b27b169aa11d24888141f206a6c246a5b195c1e67127485bac512fb6"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3310,9 +3313,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd-builder" name = "lindera-ipadic-neologd-builder"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e517df0d501f9f8bf3126da20fc8cb9a5e37921e0eec1824d7a62f096463e02" checksum = "abf36e40ace904741efdd883ed5c4dba6425f65156a0fb5d3f73a386335950dc"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@@ -3322,9 +3325,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic" name = "lindera-ko-dic"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9c6da4e68bc8b452a54b96d65361ebdceb4b6f36ecf262425c0e1f77960ae82" checksum = "4c92a1a3564b531953f0238cbcea392f2905f7b27b449978cf9e702a80e1086d"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3337,9 +3340,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic-builder" name = "lindera-ko-dic-builder"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afc95884cc8f6dfb176caf5991043a4acf94c359215bbd039ea765e00454f271" checksum = "9f2c60425abc1548570c2568858f74a1f042105ecd89faa39c651b4315350fd9"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@@ -3349,9 +3352,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-tokenizer" name = "lindera-tokenizer"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d122042e1232a55c3604692445952a134e523822e9b4b9ab32a53ff890037ad4" checksum = "903e558981bcb6f59870aa7d6b4bcb09e8f7db778886a6a70f67fd74c9fa2ca3"
dependencies = [ dependencies = [
"bincode", "bincode",
"lindera-core", "lindera-core",
@@ -3363,9 +3366,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic" name = "lindera-unidic"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbffae1fb2f2614abdcb50f99b138476dbac19862ffa57bfdc9c7b5d5b22a90c" checksum = "d227c3ce9cbd905f865c46c65a0470fd04e89b71104d7f92baa71a212ffe1d4b"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3378,9 +3381,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic-builder" name = "lindera-unidic-builder"
version = "0.32.3" version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe50055327712ebd1bcc74b657cf78c728a78b9586e3f99d5dd0b6a0be221c5d" checksum = "99e2c50015c242e02c451acb6748667ac6fd1d3d667cd7db48cd89e2f2d2377e"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@@ -3465,9 +3468,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]] [[package]]
name = "lmdb-master-sys" name = "lmdb-master-sys"
version = "0.2.5" version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df" checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9"
dependencies = [ dependencies = [
"cc", "cc",
"doxygen-rs", "doxygen-rs",
@@ -3510,18 +3513,9 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.26" version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "lru"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465"
dependencies = [
"hashbrown 0.15.2",
]
[[package]] [[package]]
name = "lzma-rs" name = "lzma-rs"
@@ -3533,17 +3527,6 @@ dependencies = [
"crc", "crc",
] ]
[[package]]
name = "lzma-sys"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "macro_rules_attribute" name = "macro_rules_attribute"
version = "0.2.0" version = "0.2.0"
@@ -3586,7 +3569,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@@ -3595,7 +3578,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@@ -3682,12 +3665,12 @@ dependencies = [
"uuid", "uuid",
"wiremock", "wiremock",
"yaup", "yaup",
"zip 2.3.0", "zip 2.2.2",
] ]
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"enum-iterator", "enum-iterator",
@@ -3706,7 +3689,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@@ -3740,9 +3723,10 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
"clap", "clap",
"dump", "dump",
"file-store", "file-store",
@@ -3774,10 +3758,10 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"arroy", "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bbqueue", "bbqueue",
"big_s", "big_s",
"bimap", "bimap",
@@ -3814,7 +3798,6 @@ dependencies = [
"json-depth-checker", "json-depth-checker",
"levenshtein_automata", "levenshtein_automata",
"liquid", "liquid",
"lru",
"maplit", "maplit",
"md5", "md5",
"meili-snap", "meili-snap",
@@ -3908,7 +3891,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [ dependencies = [
"libc", "libc",
"log", "log",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi",
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
@@ -3919,7 +3902,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [ dependencies = [
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi",
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
@@ -4146,9 +4129,9 @@ checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9"
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.21.0" version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cde51589ab56b20a6f686b2c68f7a0bd6add753d697abf720d63f8db3ab7b1ad" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]] [[package]]
name = "onig" name = "onig"
@@ -4287,7 +4270,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@@ -4535,7 +4518,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"hex", "hex",
"lazy_static", "lazy_static",
"procfs-core", "procfs-core",
@@ -4548,7 +4531,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"hex", "hex",
] ]
@@ -4696,7 +4679,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [ dependencies = [
"getrandom 0.2.15", "getrandom",
] ]
[[package]] [[package]]
@@ -4788,7 +4771,7 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
dependencies = [ dependencies = [
"getrandom 0.2.15", "getrandom",
"redox_syscall 0.2.16", "redox_syscall 0.2.16",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
@@ -4889,7 +4872,7 @@ version = "1.20.0"
source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4" source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bitflags 2.9.0", "bitflags 2.6.0",
"instant", "instant",
"num-traits", "num-traits",
"once_cell", "once_cell",
@@ -4912,14 +4895,15 @@ dependencies = [
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.14" version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
dependencies = [ dependencies = [
"cc", "cc",
"cfg-if", "cfg-if",
"getrandom 0.2.15", "getrandom",
"libc", "libc",
"spin",
"untrusted", "untrusted",
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
@@ -5025,7 +5009,7 @@ version = "0.38.41"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@@ -5147,9 +5131,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.219" version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
@@ -5165,9 +5149,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.219" version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -5176,9 +5160,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.140" version = "1.0.138"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
dependencies = [ dependencies = [
"indexmap", "indexmap",
"itoa", "itoa",
@@ -5546,7 +5530,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [ dependencies = [
"bitflags 2.9.0", "bitflags 2.6.0",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",
@@ -5602,7 +5586,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"fastrand", "fastrand",
"getrandom 0.2.15", "getrandom",
"once_cell", "once_cell",
"rustix", "rustix",
"windows-sys 0.52.0", "windows-sys 0.52.0",
@@ -5777,7 +5761,7 @@ dependencies = [
"aho-corasick", "aho-corasick",
"derive_builder 0.12.0", "derive_builder 0.12.0",
"esaxx-rs", "esaxx-rs",
"getrandom 0.2.15", "getrandom",
"itertools 0.12.1", "itertools 0.12.1",
"lazy_static", "lazy_static",
"log", "log",
@@ -6120,9 +6104,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]] [[package]]
name = "unicode-normalization" name = "unicode-normalization"
version = "0.1.24" version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5"
dependencies = [ dependencies = [
"tinyvec", "tinyvec",
] ]
@@ -6264,7 +6248,7 @@ version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
dependencies = [ dependencies = [
"getrandom 0.2.15", "getrandom",
"serde", "serde",
] ]
@@ -6360,35 +6344,25 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasi"
version = "0.13.3+wasi-0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
dependencies = [
"wit-bindgen-rt",
]
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.100" version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro", "wasm-bindgen-macro",
] ]
[[package]] [[package]]
name = "wasm-bindgen-backend" name = "wasm-bindgen-backend"
version = "0.2.100" version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"log", "log",
"once_cell",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.87", "syn 2.0.87",
@@ -6409,9 +6383,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.100" version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
dependencies = [ dependencies = [
"quote", "quote",
"wasm-bindgen-macro-support", "wasm-bindgen-macro-support",
@@ -6419,9 +6393,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro-support" name = "wasm-bindgen-macro-support"
version = "0.2.100" version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -6432,12 +6406,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-shared" name = "wasm-bindgen-shared"
version = "0.2.100" version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
dependencies = [
"unicode-ident",
]
[[package]] [[package]]
name = "wasm-streams" name = "wasm-streams"
@@ -6842,15 +6813,6 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "wit-bindgen-rt"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [
"bitflags 2.9.0",
]
[[package]] [[package]]
name = "write16" name = "write16"
version = "1.0.0" version = "1.0.0"
@@ -6885,7 +6847,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.14.0" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",
@@ -6906,15 +6868,6 @@ dependencies = [
"uuid", "uuid",
] ]
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]
[[package]] [[package]]
name = "yada" name = "yada"
version = "0.5.1" version = "0.5.1"
@@ -7056,9 +7009,9 @@ dependencies = [
[[package]] [[package]]
name = "zip" name = "zip"
version = "2.3.0" version = "2.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84e9a772a54b54236b9b744aaaf8d7be01b4d6e99725523cb82cb32d1c81b1d7" checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45"
dependencies = [ dependencies = [
"aes", "aes",
"arbitrary", "arbitrary",
@@ -7069,16 +7022,15 @@ dependencies = [
"deflate64", "deflate64",
"displaydoc", "displaydoc",
"flate2", "flate2",
"getrandom 0.3.1",
"hmac", "hmac",
"indexmap", "indexmap",
"lzma-rs", "lzma-rs",
"memchr", "memchr",
"pbkdf2", "pbkdf2",
"rand",
"sha1", "sha1",
"thiserror 2.0.9", "thiserror 2.0.9",
"time", "time",
"xz2",
"zeroize", "zeroize",
"zopfli", "zopfli",
"zstd", "zstd",

View File

@@ -22,7 +22,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.14.0" version = "1.13.3"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",
@@ -36,12 +36,6 @@ license = "MIT"
[profile.release] [profile.release]
codegen-units = 1 codegen-units = 1
# We now compile heed without the NDEBUG define for better performance.
# However, we still enable debug assertions for a better detection of
# disk corruption on the cloud or in OSS.
[profile.release.package.heed]
debug-assertions = true
[profile.dev.package.flate2] [profile.dev.package.flate2]
opt-level = 3 opt-level = 3

View File

@@ -20,7 +20,7 @@
<p align="center"> <p align="center">
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a> <a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a> <a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a> <a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
</p> </p>
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p> <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

10
bors.toml Normal file
View File

@@ -0,0 +1,10 @@
status = [
'Tests on ubuntu-22.04',
'Tests on macos-13',
'Tests on windows-2022',
'Run Clippy',
'Run Rustfmt',
'Run tests in debug',
]
# 3 hours timeout
timeout-sec = 10800

View File

@@ -35,8 +35,7 @@ fn setup_dir(path: impl AsRef<Path>) {
fn setup_index() -> Index { fn setup_index() -> Index {
let path = "benches.mmdb"; let path = "benches.mmdb";
setup_dir(path); setup_dir(path);
let options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
Index::new(options, path, true).unwrap() Index::new(options, path, true).unwrap()

View File

@@ -65,8 +65,7 @@ pub fn base_setup(conf: &Conf) -> Index {
} }
create_dir_all(conf.database_name).unwrap(); create_dir_all(conf.database_name).unwrap();
let options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
let index = Index::new(options, conf.database_name, true).unwrap(); let index = Index::new(options, conf.database_name, true).unwrap();

View File

@@ -326,7 +326,6 @@ pub(crate) mod test {
index_uids: maplit::btreemap! { "doggo".to_string() => 1 }, index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
progress_trace: Default::default(), progress_trace: Default::default(),
write_channel_congestion: None, write_channel_congestion: None,
internal_database_sizes: Default::default(),
}, },
enqueued_at: Some(BatchEnqueuedAt { enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC), earliest: datetime!(2022-11-11 0:00 UTC),

View File

@@ -57,8 +57,7 @@ fn main() {
let opt = opt.clone(); let opt = opt.clone();
let handle = std::thread::spawn(move || { let handle = std::thread::spawn(move || {
let options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(1024 * 1024 * 1024 * 1024); options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path { let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(), Some(path) => TempDir::new_in(path).unwrap(),

View File

@@ -13,7 +13,6 @@ license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.95" anyhow = "1.0.95"
bincode = "1.3.3" bincode = "1.3.3"
byte-unit = "5.1.6"
bumpalo = "3.16.0" bumpalo = "3.16.0"
bumparaw-collections = "0.1.4" bumparaw-collections = "0.1.4"
convert_case = "0.6.0" convert_case = "0.6.0"
@@ -23,7 +22,6 @@ dump = { path = "../dump" }
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.35" flate2 = "1.0.35"
indexmap = "2.7.0"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5" memmap2 = "0.9.5"
@@ -46,6 +44,7 @@ ureq = "2.12.1"
uuid = { version = "1.11.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.5.0"
big_s = "1.0.2" big_s = "1.0.2"
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
# fixed version due to format breakages in v1.40 # fixed version due to format breakages in v1.40

View File

@@ -2,7 +2,7 @@ use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RwTxn};
use crate::error::FeatureNotEnabledError; use crate::error::FeatureNotEnabledError;
use crate::Result; use crate::Result;
@@ -118,19 +118,6 @@ impl RoFeatures {
.into()) .into())
} }
} }
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.composite_embedders {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "composite embedders",
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
}
.into())
}
}
} }
impl FeatureData { impl FeatureData {
@@ -139,7 +126,7 @@ impl FeatureData {
} }
pub fn new( pub fn new(
env: &Env<WithoutTls>, env: &Env,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
instance_features: InstanceTogglableFeatures, instance_features: InstanceTogglableFeatures,
) -> Result<Self> { ) -> Result<Self> {

View File

@@ -304,8 +304,7 @@ fn create_or_open_index(
map_size: usize, map_size: usize,
creation: bool, creation: bool,
) -> Result<Index> { ) -> Result<Index> {
let options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(clamp_to_page_size(map_size)); options.map_size(clamp_to_page_size(map_size));
// You can find more details about this experimental // You can find more details about this experimental
@@ -334,7 +333,7 @@ fn create_or_open_index(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use meilisearch_types::heed::{Env, WithoutTls}; use meilisearch_types::heed::Env;
use meilisearch_types::Index; use meilisearch_types::Index;
use uuid::Uuid; use uuid::Uuid;
@@ -344,7 +343,7 @@ mod tests {
use crate::IndexScheduler; use crate::IndexScheduler;
impl IndexMapper { impl IndexMapper {
fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) { fn test() -> (Self, Env, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle) (index_scheduler.index_mapper, index_scheduler.env, handle)
} }

View File

@@ -4,7 +4,7 @@ use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::database_stats::DatabaseStats; use meilisearch_types::milli::database_stats::DatabaseStats;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@@ -164,7 +164,7 @@ impl IndexMapper {
} }
pub fn new( pub fn new(
env: &Env<WithoutTls>, env: &Env,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
budget: IndexBudget, budget: IndexBudget,

View File

@@ -344,7 +344,6 @@ pub fn snapshot_batch(batch: &Batch) -> String {
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch; let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
let stats = BatchStats { let stats = BatchStats {
progress_trace: Default::default(), progress_trace: Default::default(),
internal_database_sizes: Default::default(),
write_channel_congestion: None, write_channel_congestion: None,
..stats.clone() ..stats.clone()
}; };

View File

@@ -54,7 +54,7 @@ use meilisearch_types::batches::Batch;
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::I128; use meilisearch_types::heed::types::I128;
use meilisearch_types::heed::{self, Env, RoTxn, WithoutTls}; use meilisearch_types::heed::{self, Env, RoTxn};
use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
@@ -125,17 +125,13 @@ pub struct IndexSchedulerOptions {
pub instance_features: InstanceTogglableFeatures, pub instance_features: InstanceTogglableFeatures,
/// The experimental features enabled for this instance. /// The experimental features enabled for this instance.
pub auto_upgrade: bool, pub auto_upgrade: bool,
/// The maximal number of entries in the search query cache of an embedder.
///
/// 0 disables the cache.
pub embedding_cache_cap: usize,
} }
/// Structure which holds meilisearch's indexes and schedules the tasks /// Structure which holds meilisearch's indexes and schedules the tasks
/// to be performed on them. /// to be performed on them.
pub struct IndexScheduler { pub struct IndexScheduler {
/// The LMDB environment which the DBs are associated with. /// The LMDB environment which the DBs are associated with.
pub(crate) env: Env<WithoutTls>, pub(crate) env: Env,
/// The list of tasks currently processing /// The list of tasks currently processing
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>, pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
@@ -160,11 +156,6 @@ pub struct IndexScheduler {
/// The Authorization header to send to the webhook URL. /// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>, pub(crate) webhook_authorization_header: Option<String>,
/// A map to retrieve the runtime representation of an embedder depending on its configuration.
///
/// This map may return the same embedder object for two different indexes or embedder settings,
/// but it will only do this if the embedder configuration options are the same, leading
/// to the same embeddings for the same input text.
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>, embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
// ================= test // ================= test
@@ -218,7 +209,6 @@ impl IndexScheduler {
#[allow(private_interfaces)] // because test_utils is private #[allow(private_interfaces)] // because test_utils is private
pub fn new( pub fn new(
options: IndexSchedulerOptions, options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32), from_db_version: (u32, u32, u32),
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>, #[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
@@ -250,9 +240,7 @@ impl IndexScheduler {
}; };
let env = unsafe { let env = unsafe {
let env_options = heed::EnvOpenOptions::new(); heed::EnvOpenOptions::new()
let mut env_options = env_options.read_txn_without_tls();
env_options
.max_dbs(Self::nb_db()) .max_dbs(Self::nb_db())
.map_size(budget.task_db_size) .map_size(budget.task_db_size)
.open(&options.tasks_path) .open(&options.tasks_path)
@@ -272,7 +260,7 @@ impl IndexScheduler {
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
version, version,
queue, queue,
scheduler: Scheduler::new(&options, auth_env), scheduler: Scheduler::new(&options),
index_mapper, index_mapper,
env, env,
@@ -370,7 +358,7 @@ impl IndexScheduler {
} }
} }
pub fn read_txn(&self) -> Result<RoTxn<WithoutTls>> { pub fn read_txn(&self) -> Result<RoTxn> {
self.env.read_txn().map_err(|e| e.into()) self.env.read_txn().map_err(|e| e.into())
} }
@@ -439,14 +427,12 @@ impl IndexScheduler {
/// If you need to fetch information from or perform an action on all indexes, /// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function. /// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> { pub fn index(&self, name: &str) -> Result<Index> {
let rtxn = self.env.read_txn()?; self.index_mapper.index(&self.env.read_txn()?, name)
self.index_mapper.index(&rtxn, name)
} }
/// Return the boolean referring if index exists. /// Return the boolean referring if index exists.
pub fn index_exists(&self, name: &str) -> Result<bool> { pub fn index_exists(&self, name: &str) -> Result<bool> {
let rtxn = self.env.read_txn()?; self.index_mapper.index_exists(&self.env.read_txn()?, name)
self.index_mapper.index_exists(&rtxn, name)
} }
/// Return the name of all indexes without opening them. /// Return the name of all indexes without opening them.
@@ -521,8 +507,7 @@ impl IndexScheduler {
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
/// 3. The number of times the properties appeared. /// 3. The number of times the properties appeared.
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let rtxn = self.read_txn()?; self.queue.get_stats(&self.read_txn()?, &self.processing_tasks.read().unwrap())
self.queue.get_stats(&rtxn, &self.processing_tasks.read().unwrap())
} }
// Return true if there is at least one task that is processing. // Return true if there is at least one task that is processing.
@@ -625,8 +610,8 @@ impl IndexScheduler {
task_id: Option<TaskId>, task_id: Option<TaskId>,
dry_run: bool, dry_run: bool,
) -> Result<Task> { ) -> Result<Task> {
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty()) if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40 && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
{ {
return Err(Error::NoSpaceLeftInTaskQueue); return Err(Error::NoSpaceLeftInTaskQueue);
@@ -827,7 +812,7 @@ impl IndexScheduler {
// add missing embedder // add missing embedder
let embedder = Arc::new( let embedder = Arc::new(
Embedder::new(embedder_options.clone(), self.scheduler.embedding_cache_cap) Embedder::new(embedder_options.clone())
.map_err(meilisearch_types::milli::vector::Error::from) .map_err(meilisearch_types::milli::vector::Error::from)
.map_err(|err| { .map_err(|err| {
Error::from_milli(err.into(), Some(index_uid.clone())) Error::from_milli(err.into(), Some(index_uid.clone()))

View File

@@ -64,13 +64,6 @@ make_enum_progress! {
} }
} }
make_enum_progress! {
pub enum FinalizingIndexStep {
Committing,
ComputingStats,
}
}
make_enum_progress! { make_enum_progress! {
pub enum TaskCancelationProgress { pub enum TaskCancelationProgress {
RetrievingTasks, RetrievingTasks,

View File

@@ -3,7 +3,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId}; use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::tasks::{Kind, Status};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@@ -66,7 +66,7 @@ impl BatchQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> { pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?, all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?, status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,

View File

@@ -13,7 +13,7 @@ use std::time::Duration;
use file_store::FileStore; use file_store::FileStore;
use meilisearch_types::batches::BatchId; use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -157,7 +157,7 @@ impl Queue {
/// Create an index scheduler and start its run loop. /// Create an index scheduler and start its run loop.
pub(crate) fn new( pub(crate) fn new(
env: &Env<WithoutTls>, env: &Env,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
) -> Result<Self> { ) -> Result<Self> {
@@ -292,6 +292,8 @@ impl Queue {
return Ok(task); return Ok(task);
} }
// Get rid of the mutability.
let task = task;
self.tasks.register(wtxn, &task)?; self.tasks.register(wtxn, &task)?;
Ok(task) Ok(task)

View File

@@ -1,7 +1,7 @@
use std::ops::{Bound, RangeBounds}; use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status, Task}; use meilisearch_types::tasks::{Kind, Status, Task};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@@ -68,7 +68,7 @@ impl TaskQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> { pub(crate) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?, all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
status: env.create_database(wtxn, Some(db_name::STATUS))?, status: env.create_database(wtxn, Some(db_name::STATUS))?,

View File

@@ -364,7 +364,7 @@ fn test_task_queue_is_full() {
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
// Even the task deletion and cancelation that don't delete anything should be refused // Even the task deletion that doesn't delete anything shouldn't be accepted
let result = index_scheduler let result = index_scheduler
.register( .register(
KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() },
@@ -373,39 +373,10 @@ fn test_task_queue_is_full() {
) )
.unwrap_err(); .unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
let result = index_scheduler
.register(
KindWithContent::TaskCancelation { query: S("test"), tasks: RoaringBitmap::new() },
None,
false,
)
.unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
// But a task cancelation that cancel something should work // But a task deletion that delete something should works
index_scheduler
.register(
KindWithContent::TaskCancelation { query: S("test"), tasks: (0..100).collect() },
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
// But we should still be forbidden from enqueuing new tasks
let result = index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
// And a task deletion that delete something should works
index_scheduler index_scheduler
.register( .register(
KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() },

View File

@@ -20,12 +20,9 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc; use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::tasks::Status; use meilisearch_types::tasks::Status;
use process_batch::ProcessBatchInfo;
use rayon::current_num_threads; use rayon::current_num_threads;
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -74,15 +71,10 @@ pub struct Scheduler {
pub(crate) snapshots_path: PathBuf, pub(crate) snapshots_path: PathBuf,
/// The path to the folder containing the auth LMDB env. /// The path to the folder containing the auth LMDB env.
pub(crate) auth_env: Env<WithoutTls>, pub(crate) auth_path: PathBuf,
/// The path to the version file of Meilisearch. /// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf, pub(crate) version_file_path: PathBuf,
/// The maximal number of entries in the search query cache of an embedder.
///
/// 0 disables the cache.
pub(crate) embedding_cache_cap: usize,
} }
impl Scheduler { impl Scheduler {
@@ -95,13 +87,12 @@ impl Scheduler {
batched_tasks_size_limit: self.batched_tasks_size_limit, batched_tasks_size_limit: self.batched_tasks_size_limit,
dumps_path: self.dumps_path.clone(), dumps_path: self.dumps_path.clone(),
snapshots_path: self.snapshots_path.clone(), snapshots_path: self.snapshots_path.clone(),
auth_env: self.auth_env.clone(), auth_path: self.auth_path.clone(),
version_file_path: self.version_file_path.clone(), version_file_path: self.version_file_path.clone(),
embedding_cache_cap: self.embedding_cache_cap,
} }
} }
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler { pub fn new(options: &IndexSchedulerOptions) -> Scheduler {
Scheduler { Scheduler {
must_stop_processing: MustStopProcessing::default(), must_stop_processing: MustStopProcessing::default(),
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
@@ -111,9 +102,8 @@ impl Scheduler {
batched_tasks_size_limit: options.batched_tasks_size_limit, batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: options.dumps_path.clone(), dumps_path: options.dumps_path.clone(),
snapshots_path: options.snapshots_path.clone(), snapshots_path: options.snapshots_path.clone(),
auth_env, auth_path: options.auth_path.clone(),
version_file_path: options.version_file_path.clone(), version_file_path: options.version_file_path.clone(),
embedding_cache_cap: options.embedding_cache_cap,
} }
} }
} }
@@ -225,16 +215,16 @@ impl IndexScheduler {
let mut stop_scheduler_forever = false; let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new(); let mut canceled = RoaringBitmap::new();
let mut process_batch_info = ProcessBatchInfo::default(); let mut congestion = None;
match res { match res {
Ok((tasks, info)) => { Ok((tasks, cong)) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded); self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj); progress.update_progress(task_progress_obj);
process_batch_info = info; congestion = cong;
let mut success = 0; let mut success = 0;
let mut failure = 0; let mut failure = 0;
let mut canceled_by = None; let mut canceled_by = None;
@@ -352,9 +342,6 @@ impl IndexScheduler {
// We must re-add the canceled task so they're part of the same batch. // We must re-add the canceled task so they're part of the same batch.
ids |= canceled; ids |= canceled;
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
process_batch_info;
processing_batch.stats.progress_trace = processing_batch.stats.progress_trace =
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect(); progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| { processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
@@ -364,33 +351,6 @@ impl IndexScheduler {
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into()); congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info congestion_info
}); });
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use byte_unit::{Byte, UnitType::Binary};
use std::cmp::Ordering::{Equal, Greater, Less};
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
if let Some(congestion) = congestion { if let Some(congestion) = congestion {
tracing::debug!( tracing::debug!(

View File

@@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
use super::create_batch::Batch; use super::create_batch::Batch;
use crate::processing::{ use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep, AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress, UpdateIndexProgress,
}; };
@@ -22,16 +22,6 @@ use crate::utils::{
}; };
use crate::{Error, IndexScheduler, Result, TaskId}; use crate::{Error, IndexScheduler, Result, TaskId};
#[derive(Debug, Default)]
pub struct ProcessBatchInfo {
/// The write channel congestion. None when unavailable: settings update.
pub congestion: Option<ChannelCongestion>,
/// The sizes of the different databases before starting the indexation.
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
/// The sizes of the different databases after commiting the indexation.
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
}
impl IndexScheduler { impl IndexScheduler {
/// Apply the operation associated with the given batch. /// Apply the operation associated with the given batch.
/// ///
@@ -45,7 +35,7 @@ impl IndexScheduler {
batch: Batch, batch: Batch,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
progress: Progress, progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> { ) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
#[cfg(test)] #[cfg(test)]
{ {
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?; self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
@@ -86,7 +76,7 @@ impl IndexScheduler {
canceled_tasks.push(task); canceled_tasks.push(task);
Ok((canceled_tasks, ProcessBatchInfo::default())) Ok((canceled_tasks, None))
} }
Batch::TaskDeletions(mut tasks) => { Batch::TaskDeletions(mut tasks) => {
// 1. Retrieve the tasks that matched the query at enqueue-time. // 1. Retrieve the tasks that matched the query at enqueue-time.
@@ -125,14 +115,14 @@ impl IndexScheduler {
_ => unreachable!(), _ => unreachable!(),
} }
} }
Ok((tasks, ProcessBatchInfo::default())) Ok((tasks, None))
}
Batch::SnapshotCreation(tasks) => {
self.process_snapshot(progress, tasks).map(|tasks| (tasks, None))
}
Batch::Dump(task) => {
self.process_dump_creation(progress, task).map(|tasks| (tasks, None))
} }
Batch::SnapshotCreation(tasks) => self
.process_snapshot(progress, tasks)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::Dump(task) => self
.process_dump_creation(progress, task)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::IndexOperation { op, must_create_index } => { Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string(); let index_uid = op.index_uid().to_string();
let index = if must_create_index { let index = if must_create_index {
@@ -149,12 +139,10 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone()))); .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) = let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?; self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
{ {
progress.update_progress(FinalizingIndexStep::Committing);
let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
let _entered = span.enter(); let _entered = span.enter();
@@ -165,15 +153,12 @@ impl IndexScheduler {
// stats of the index. Since the tasks have already been processed and // stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail // this is a non-critical operation. If it fails, we should not fail
// the entire batch. // the entire batch.
let mut post_commit_dabases_sizes = None;
let res = || -> Result<()> { let res = || -> Result<()> {
progress.update_progress(FinalizingIndexStep::ComputingStats);
let index_rtxn = index.read_txn()?; let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
wtxn.commit()?; wtxn.commit()?;
Ok(()) Ok(())
}(); }();
@@ -186,16 +171,7 @@ impl IndexScheduler {
), ),
} }
let info = ProcessBatchInfo { Ok((tasks, congestion))
congestion,
// In case we fail to the get post-commit sizes we decide
// that nothing changed and use the pre-commit sizes.
post_commit_dabases_sizes: post_commit_dabases_sizes
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
pre_commit_dabases_sizes,
};
Ok((tasks, info))
} }
Batch::IndexCreation { index_uid, primary_key, task } => { Batch::IndexCreation { index_uid, primary_key, task } => {
progress.update_progress(CreateIndexProgress::CreatingTheIndex); progress.update_progress(CreateIndexProgress::CreatingTheIndex);
@@ -263,7 +239,7 @@ impl IndexScheduler {
), ),
} }
Ok((vec![task], ProcessBatchInfo::default())) Ok((vec![task], None))
} }
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
progress.update_progress(DeleteIndexProgress::DeletingTheIndex); progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
@@ -297,9 +273,7 @@ impl IndexScheduler {
}; };
} }
// Here we could also show that all the internal database sizes goes to 0 Ok((tasks, None))
// but it would mean opening the index and that's costly.
Ok((tasks, ProcessBatchInfo::default()))
} }
Batch::IndexSwap { mut task } => { Batch::IndexSwap { mut task } => {
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
@@ -347,7 +321,7 @@ impl IndexScheduler {
} }
wtxn.commit()?; wtxn.commit()?;
task.status = Status::Succeeded; task.status = Status::Succeeded;
Ok((vec![task], ProcessBatchInfo::default())) Ok((vec![task], None))
} }
Batch::UpgradeDatabase { mut tasks } => { Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else { let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
@@ -377,7 +351,7 @@ impl IndexScheduler {
task.error = None; task.error = None;
} }
Ok((tasks, ProcessBatchInfo::default())) Ok((tasks, None))
} }
} }
} }

View File

@@ -32,7 +32,7 @@ impl IndexScheduler {
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
progress: &Progress, progress: Progress,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> { ) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
@@ -186,7 +186,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
progress, &progress,
) )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?, .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
); );
@@ -307,7 +307,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
progress, &progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
); );
@@ -465,7 +465,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
progress, &progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
); );
@@ -520,7 +520,7 @@ impl IndexScheduler {
index_uid: index_uid.clone(), index_uid: index_uid.clone(),
tasks: cleared_tasks, tasks: cleared_tasks,
}, },
progress, progress.clone(),
)?; )?;
let (settings_tasks, _congestion) = self.apply_index_operation( let (settings_tasks, _congestion) = self.apply_index_operation(

View File

@@ -4,6 +4,7 @@ use std::sync::atomic::Ordering;
use meilisearch_types::heed::CompactionOption; use meilisearch_types::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep}; use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Status, Task}; use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::{compression, VERSION_FILE_NAME}; use meilisearch_types::{compression, VERSION_FILE_NAME};
@@ -27,7 +28,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env // 2. Snapshot the index-scheduler LMDB env
// //
// When we call copy_to_path, LMDB opens a read transaction by itself, // When we call copy_to_file, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know // we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy // the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks. // of the env and the new transaction we open to retrieve the enqueued tasks.
@@ -41,7 +42,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks"); let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler // 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@@ -80,7 +81,7 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
index index
.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled) .copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?; .map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
} }
@@ -90,7 +91,14 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth"); let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; // TODO We can't use the open_auth_store_env function here but we should
let auth = unsafe {
milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.scheduler.auth_path)
}?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball); progress.update_progress(SnapshotCreationProgress::CreateTheTarball);

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,] [timestamp] [4,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, } 1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } 2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } 3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@@ -104,9 +104,10 @@ fn import_vectors() {
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap(); let beagle_embed =
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap(); hf_embedder.embed_search(S("Intel the beagle best doggo"), None).unwrap();
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap(); let lab_embed = hf_embedder.embed_search(S("Max the lab best doggo"), None).unwrap();
let patou_embed = hf_embedder.embed_search(S("kefir the patou best doggo"), None).unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
}; };

View File

@@ -5,7 +5,6 @@ use std::time::Duration;
use big_s::S; use big_s::S;
use crossbeam_channel::RecvTimeoutError; use crossbeam_channel::RecvTimeoutError;
use file_store::File; use file_store::File;
use meilisearch_auth::open_auth_store_env;
use meilisearch_types::document_formats::DocumentFormatError; use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments; use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@@ -112,7 +111,6 @@ impl IndexScheduler {
batched_tasks_size_limit: u64::MAX, batched_tasks_size_limit: u64::MAX,
instance_features: Default::default(), instance_features: Default::default(),
auto_upgrade: true, // Don't cost much and will ensure the happy path works auto_upgrade: true, // Don't cost much and will ensure the happy path works
embedding_cache_cap: 10,
}; };
let version = configuration(&mut options).unwrap_or_else(|| { let version = configuration(&mut options).unwrap_or_else(|| {
( (
@@ -122,10 +120,7 @@ impl IndexScheduler {
) )
}); });
std::fs::create_dir_all(&options.auth_path).unwrap(); let index_scheduler = Self::new(options, version, sender, planned_failures).unwrap();
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
let index_scheduler =
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now // To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state. // and ensure it's in the expected starting state.

View File

@@ -1,5 +1,5 @@
use anyhow::bail; use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn, WithoutTls}; use meilisearch_types::heed::{Env, RwTxn};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use time::OffsetDateTime; use time::OffsetDateTime;
@@ -9,17 +9,13 @@ use crate::queue::TaskQueue;
use crate::versioning::Versioning; use crate::versioning::Versioning;
trait UpgradeIndexScheduler { trait UpgradeIndexScheduler {
fn upgrade( fn upgrade(&self, env: &Env, wtxn: &mut RwTxn, original: (u32, u32, u32))
&self, -> anyhow::Result<()>;
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32); fn target_version(&self) -> (u32, u32, u32);
} }
pub fn upgrade_index_scheduler( pub fn upgrade_index_scheduler(
env: &Env<WithoutTls>, env: &Env,
versioning: &Versioning, versioning: &Versioning,
from: (u32, u32, u32), from: (u32, u32, u32),
to: (u32, u32, u32), to: (u32, u32, u32),
@@ -33,7 +29,6 @@ pub fn upgrade_index_scheduler(
let start = match from { let start = match from {
(1, 12, _) => 0, (1, 12, _) => 0,
(1, 13, _) => 0, (1, 13, _) => 0,
(1, 14, _) => 0,
(major, minor, patch) => { (major, minor, patch) => {
if major > current_major if major > current_major
|| (major == current_major && minor > current_minor) || (major == current_major && minor > current_minor)
@@ -96,7 +91,7 @@ struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for ToCurrentNoOp { impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade( fn upgrade(
&self, &self,
_env: &Env<WithoutTls>, _env: &Env,
_wtxn: &mut RwTxn, _wtxn: &mut RwTxn,
_original: (u32, u32, u32), _original: (u32, u32, u32),
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {

View File

@@ -1,5 +1,5 @@
use meilisearch_types::heed::types::Str; use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn, WithoutTls}; use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::heed_codec::version::VersionCodec; use meilisearch_types::milli::heed_codec::version::VersionCodec;
use meilisearch_types::versioning; use meilisearch_types::versioning;
@@ -46,12 +46,12 @@ impl Versioning {
} }
/// Return `Self` without checking anything about the version /// Return `Self` without checking anything about the version
pub fn raw_new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self, heed::Error> { pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
let version = env.create_database(wtxn, Some(db_name::VERSION))?; let version = env.create_database(wtxn, Some(db_name::VERSION))?;
Ok(Self { version }) Ok(Self { version })
} }
pub(crate) fn new(env: &Env<WithoutTls>, db_version: (u32, u32, u32)) -> Result<Self> { pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> {
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let this = Self::raw_new(env, &mut wtxn)?; let this = Self::raw_new(env, &mut wtxn)?;
let from = match this.get_version(&wtxn)? { let from = match this.get_version(&wtxn)? {

View File

@@ -2,7 +2,6 @@ use std::fs::File;
use std::io::{BufReader, Write}; use std::io::{BufReader, Write};
use std::path::Path; use std::path::Path;
use meilisearch_types::heed::{Env, WithoutTls};
use serde_json::Deserializer; use serde_json::Deserializer;
use crate::{AuthController, HeedAuthStore, Result}; use crate::{AuthController, HeedAuthStore, Result};
@@ -10,8 +9,11 @@ use crate::{AuthController, HeedAuthStore, Result};
const KEYS_PATH: &str = "keys"; const KEYS_PATH: &str = "keys";
impl AuthController { impl AuthController {
pub fn dump(auth_env: Env<WithoutTls>, dst: impl AsRef<Path>) -> Result<()> { pub fn dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
let store = HeedAuthStore::new(auth_env)?; let mut store = HeedAuthStore::new(&src)?;
// do not attempt to close the database on drop!
store.set_drop_on_close(false);
let keys_file_path = dst.as_ref().join(KEYS_PATH); let keys_file_path = dst.as_ref().join(KEYS_PATH);
@@ -25,8 +27,8 @@ impl AuthController {
Ok(()) Ok(())
} }
pub fn load_dump(src: impl AsRef<Path>, auth_env: Env<WithoutTls>) -> Result<()> { pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
let store = HeedAuthStore::new(auth_env)?; let store = HeedAuthStore::new(&dst)?;
let keys_file_path = src.as_ref().join(KEYS_PATH); let keys_file_path = src.as_ref().join(KEYS_PATH);

View File

@@ -3,10 +3,11 @@ pub mod error;
mod store; mod store;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::sync::Arc;
use error::{AuthControllerError, Result}; use error::{AuthControllerError, Result};
use maplit::hashset; use maplit::hashset;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey}; use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
@@ -18,19 +19,19 @@ use uuid::Uuid;
#[derive(Clone)] #[derive(Clone)]
pub struct AuthController { pub struct AuthController {
store: HeedAuthStore, store: Arc<HeedAuthStore>,
master_key: Option<String>, master_key: Option<String>,
} }
impl AuthController { impl AuthController {
pub fn new(auth_env: Env<WithoutTls>, master_key: &Option<String>) -> Result<Self> { pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
let store = HeedAuthStore::new(auth_env)?; let store = HeedAuthStore::new(db_path)?;
if store.is_empty()? { if store.is_empty()? {
generate_default_keys(&store)?; generate_default_keys(&store)?;
} }
Ok(Self { store, master_key: master_key.clone() }) Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
} }
/// Return `Ok(())` if the auth controller is able to access one of its database. /// Return `Ok(())` if the auth controller is able to access one of its database.

View File

@@ -1,16 +1,18 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::HashSet; use std::collections::HashSet;
use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use std::result::Result as StdResult; use std::result::Result as StdResult;
use std::str; use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac}; use hmac::{Hmac, Mac};
use meilisearch_types::heed::{BoxedError, WithoutTls}; use meilisearch_types::heed::BoxedError;
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId; use meilisearch_types::keys::KeyId;
use meilisearch_types::milli::heed; use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256; use sha2::Sha256;
@@ -23,32 +25,44 @@ use super::error::{AuthControllerError, Result};
use super::{Action, Key}; use super::{Action, Key};
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
const AUTH_DB_PATH: &str = "auth";
const KEY_DB_NAME: &str = "api-keys"; const KEY_DB_NAME: &str = "api-keys";
const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration"; const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration";
#[derive(Clone)] #[derive(Clone)]
pub struct HeedAuthStore { pub struct HeedAuthStore {
env: Env<WithoutTls>, env: Arc<Env>,
keys: Database<Bytes, SerdeJson<Key>>, keys: Database<Bytes, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>, action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
} }
pub fn open_auth_store_env(path: &Path) -> heed::Result<Env<WithoutTls>> { impl Drop for HeedAuthStore {
let options = EnvOpenOptions::new(); fn drop(&mut self) {
let mut options = options.read_txn_without_tls(); if self.should_close_on_drop && Arc::strong_count(&self.env) == 1 {
self.env.as_ref().clone().prepare_for_closing();
}
}
}
pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env> {
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2); options.max_dbs(2);
unsafe { options.open(path) } unsafe { options.open(path) }
} }
impl HeedAuthStore { impl HeedAuthStore {
pub fn new(env: Env<WithoutTls>) -> Result<Self> { pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join(AUTH_DB_PATH);
create_dir_all(&path)?;
let env = Arc::new(open_auth_store_env(path.as_ref())?);
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?; let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
let action_keyid_index_expiration = let action_keyid_index_expiration =
env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
wtxn.commit()?; wtxn.commit()?;
Ok(Self { env, keys, action_keyid_index_expiration }) Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
} }
/// Return `Ok(())` if the auth store is able to access one of its database. /// Return `Ok(())` if the auth store is able to access one of its database.
@@ -68,6 +82,10 @@ impl HeedAuthStore {
Ok(self.env.non_free_pages_size()?) Ok(self.env.non_free_pages_size()?)
} }
pub fn set_drop_on_close(&mut self, v: bool) {
self.should_close_on_drop = v;
}
pub fn is_empty(&self) -> Result<bool> { pub fn is_empty(&self) -> Result<bool> {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@@ -275,7 +293,7 @@ impl HeedAuthStore {
/// optionally on a specific index, for a given key. /// optionally on a specific index, for a given key.
pub struct KeyIdActionCodec; pub struct KeyIdActionCodec;
impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec { impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>); type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
@@ -292,7 +310,7 @@ impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
} }
} }
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec { impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> { fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {

View File

@@ -64,6 +64,4 @@ pub struct BatchStats {
pub progress_trace: serde_json::Map<String, serde_json::Value>, pub progress_trace: serde_json::Map<String, serde_json::Value>,
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>, pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>,
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
} }

View File

@@ -241,7 +241,6 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ; InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentIds , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ; InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ;
@@ -282,7 +281,6 @@ InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ; InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidSearchLocales , InvalidRequest , BAD_REQUEST ; InvalidSearchLocales , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchExhaustiveFacetCount, InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSimilarId , InvalidRequest , BAD_REQUEST ; InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
@@ -407,7 +405,7 @@ impl ErrorCode for milli::Error {
match error { match error {
// TODO: wait for spec for new error codes. // TODO: wait for spec for new error codes.
UserError::SerdeJson(_) UserError::SerdeJson(_)
| UserError::EnvAlreadyOpened | UserError::InvalidLmdbOpenOptions
| UserError::DocumentLimitReached | UserError::DocumentLimitReached
| UserError::UnknownInternalDocumentId { .. } => Code::Internal, | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile, UserError::InvalidStoreFile => Code::InvalidStoreFile,
@@ -454,10 +452,7 @@ impl ErrorCode for milli::Error {
} }
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
| UserError::InvalidIndexingVectorDimensions { .. } => {
Code::InvalidVectorDimensions
}
UserError::InvalidVectorsMapType { .. } UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType, | UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors, UserError::TooManyVectors(_, _) => Code::TooManyVectors,
@@ -507,7 +502,8 @@ impl ErrorCode for HeedError {
HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding(_) | HeedError::Encoding(_)
| HeedError::Decoding(_) | HeedError::Decoding(_)
| HeedError::EnvAlreadyOpened => Code::Internal, | HeedError::DatabaseClosing
| HeedError::BadOpenOptions { .. } => Code::Internal,
} }
} }
} }

View File

@@ -11,7 +11,6 @@ pub struct RuntimeTogglableFeatures {
pub contains_filter: bool, pub contains_filter: bool,
pub network: bool, pub network: bool,
pub get_task_documents_route: bool, pub get_task_documents_route: bool,
pub composite_embedders: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]

View File

@@ -30,7 +30,11 @@ actix-web = { version = "4.9.0", default-features = false, features = [
anyhow = { version = "1.0.95", features = ["backtrace"] } anyhow = { version = "1.0.95", features = ["backtrace"] }
async-trait = "0.1.85" async-trait = "0.1.85"
bstr = "1.11.3" bstr = "1.11.3"
byte-unit = { version = "5.1.6", features = ["serde"] } byte-unit = { version = "5.1.6", default-features = false, features = [
"std",
"byte",
"serde",
] }
bytes = "1.9.0" bytes = "1.9.0"
clap = { version = "4.5.24", features = ["derive", "env"] } clap = { version = "4.5.24", features = ["derive", "env"] }
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
@@ -136,7 +140,7 @@ reqwest = { version = "0.12.12", features = [
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true } static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.15.0", optional = true } tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.3.0", optional = true } zip = { version = "2.2.2", optional = true }
[features] [features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
@@ -166,5 +170,5 @@ german = ["meilisearch-types/german"]
turkish = ["meilisearch-types/turkish"] turkish = ["meilisearch-types/turkish"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.18/build.zip"
sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834" sha1 = "b408a30dcb6e20cddb0c153c23385bcac4c8e912"

View File

@@ -198,8 +198,6 @@ struct Infos {
experimental_limit_batched_tasks_total_size: u64, experimental_limit_batched_tasks_total_size: u64,
experimental_network: bool, experimental_network: bool,
experimental_get_task_documents_route: bool, experimental_get_task_documents_route: bool,
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
gpu_enabled: bool, gpu_enabled: bool,
db_path: bool, db_path: bool,
import_dump: bool, import_dump: bool,
@@ -247,7 +245,6 @@ impl Infos {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
http_addr, http_addr,
master_key: _, master_key: _,
env, env,
@@ -293,7 +290,6 @@ impl Infos {
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = features; } = features;
// We're going to override every sensible information. // We're going to override every sensible information.
@@ -313,8 +309,6 @@ impl Infos {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_network: network, experimental_network: network,
experimental_get_task_documents_route: get_task_documents_route, experimental_get_task_documents_route: get_task_documents_route,
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),
@@ -329,8 +323,7 @@ impl Infos {
http_addr: http_addr != default_http_addr(), http_addr: http_addr != default_http_addr(),
http_payload_size_limit, http_payload_size_limit,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size: experimental_limit_batched_tasks_total_size,
experimental_limit_batched_tasks_total_size.into(),
task_queue_webhook: task_webhook_url.is_some(), task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(), task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(), log_level: log_level.to_string(),

View File

@@ -34,7 +34,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning; use index_scheduler::versioning::Versioning;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::{open_auth_store_env, AuthController}; use meilisearch_auth::AuthController;
use meilisearch_types::milli::constants::VERSION_MAJOR; use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@@ -228,12 +228,11 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
cleanup_enabled: !opt.experimental_replication_parameters, cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000, max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(), batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT, index_count: DEFAULT_INDEX_COUNT,
instance_features: opt.to_instance_features(), instance_features: opt.to_instance_features(),
auto_upgrade: opt.experimental_dumpless_upgrade, auto_upgrade: opt.experimental_dumpless_upgrade,
embedding_cache_cap: opt.experimental_embedding_cache_entries,
}; };
let bin_major: u32 = VERSION_MAJOR.parse().unwrap(); let bin_major: u32 = VERSION_MAJOR.parse().unwrap();
let bin_minor: u32 = VERSION_MINOR.parse().unwrap(); let bin_minor: u32 = VERSION_MINOR.parse().unwrap();
@@ -336,12 +335,9 @@ fn open_or_create_database_unchecked(
) -> anyhow::Result<(IndexScheduler, AuthController)> { ) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we // we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later. // wrap our two builders in a closure that'll be executed later.
std::fs::create_dir_all(&index_scheduler_opt.auth_path)?; let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap(); let index_scheduler_builder =
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key); || -> anyhow::Result<_> { Ok(IndexScheduler::new(index_scheduler_opt, version)?) };
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
};
match ( match (
index_scheduler_builder(), index_scheduler_builder(),
@@ -424,7 +420,6 @@ pub fn update_version_file_for_dumpless_upgrade(
if from_major == 1 && from_minor == 12 { if from_major == 1 && from_minor == 12 {
let env = unsafe { let env = unsafe {
heed::EnvOpenOptions::new() heed::EnvOpenOptions::new()
.read_txn_without_tls()
.max_dbs(Versioning::nb_db()) .max_dbs(Versioning::nb_db())
.map_size(index_scheduler_opt.task_db_size) .map_size(index_scheduler_opt.task_db_size)
.open(&index_scheduler_opt.tasks_path) .open(&index_scheduler_opt.tasks_path)

View File

@@ -63,8 +63,7 @@ const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS"; "MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str = const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE"; "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms"; const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@@ -445,15 +444,7 @@ pub struct Opt {
/// see: <https://github.com/orgs/meilisearch/discussions/801> /// see: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())] #[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
#[serde(default = "default_limit_batched_tasks_total_size")] #[serde(default = "default_limit_batched_tasks_total_size")]
pub experimental_limit_batched_tasks_total_size: Byte, pub experimental_limit_batched_tasks_total_size: u64,
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
/// distinct embedder.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/818>.
#[clap(long, env = MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES, default_value_t = default_embedding_cache_entries())]
#[serde(default = "default_embedding_cache_entries")]
pub experimental_embedding_cache_entries: usize,
#[serde(flatten)] #[serde(flatten)]
#[clap(flatten)] #[clap(flatten)]
@@ -558,7 +549,6 @@ impl Opt {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
} = self; } = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -651,10 +641,6 @@ impl Opt {
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
experimental_limit_batched_tasks_total_size.to_string(), experimental_limit_batched_tasks_total_size.to_string(),
); );
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
experimental_embedding_cache_entries.to_string(),
);
indexer_options.export_to_env(); indexer_options.export_to_env();
} }
@@ -958,12 +944,8 @@ fn default_limit_batched_tasks() -> usize {
usize::MAX usize::MAX
} }
fn default_limit_batched_tasks_total_size() -> Byte { fn default_limit_batched_tasks_total_size() -> u64 {
Byte::from_u64(u64::MAX) u64::MAX
}
fn default_embedding_cache_entries() -> usize {
0
} }
fn default_snapshot_dir() -> PathBuf { fn default_snapshot_dir() -> PathBuf {

View File

@@ -52,7 +52,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false), network: Some(false),
get_task_documents_route: Some(false), get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@@ -95,8 +94,6 @@ pub struct RuntimeTogglableFeatures {
pub network: Option<bool>, pub network: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub get_task_documents_route: Option<bool>, pub get_task_documents_route: Option<bool>,
#[deserr(default)]
pub composite_embedders: Option<bool>,
} }
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures { impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@@ -108,7 +105,6 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = value; } = value;
Self { Self {
@@ -118,7 +114,6 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter: Some(contains_filter), contains_filter: Some(contains_filter),
network: Some(network), network: Some(network),
get_task_documents_route: Some(get_task_documents_route), get_task_documents_route: Some(get_task_documents_route),
composite_embedders: Some(composite_embedders),
} }
} }
} }
@@ -131,7 +126,6 @@ pub struct PatchExperimentalFeatureAnalytics {
contains_filter: bool, contains_filter: bool,
network: bool, network: bool,
get_task_documents_route: bool, get_task_documents_route: bool,
composite_embedders: bool,
} }
impl Aggregate for PatchExperimentalFeatureAnalytics { impl Aggregate for PatchExperimentalFeatureAnalytics {
@@ -147,7 +141,6 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: new.contains_filter, contains_filter: new.contains_filter,
network: new.network, network: new.network,
get_task_documents_route: new.get_task_documents_route, get_task_documents_route: new.get_task_documents_route,
composite_embedders: new.composite_embedders,
}) })
} }
@@ -172,7 +165,6 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false), network: Some(false),
get_task_documents_route: Some(false), get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@@ -210,10 +202,6 @@ async fn patch_features(
.0 .0
.get_task_documents_route .get_task_documents_route
.unwrap_or(old_features.get_task_documents_route), .unwrap_or(old_features.get_task_documents_route),
composite_embedders: new_features
.0
.composite_embedders
.unwrap_or(old_features.composite_embedders),
}; };
// explicitly destructure for analytics rather than using the `Serialize` implementation, because // explicitly destructure for analytics rather than using the `Serialize` implementation, because
@@ -226,7 +214,6 @@ async fn patch_features(
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = new_features; } = new_features;
analytics.publish( analytics.publish(
@@ -237,7 +224,6 @@ async fn patch_features(
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
}, },
&req, &req,
); );

View File

@@ -20,13 +20,11 @@ use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId; use meilisearch_types::milli::DocumentId;
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index}; use meilisearch_types::{milli, Document, Index};
use mime::Mime; use mime::Mime;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
@@ -45,7 +43,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{ use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
}; };
use crate::search::{parse_filter, ExternalDocumentId, RetrieveVectors}; use crate::search::{parse_filter, RetrieveVectors};
use crate::{aggregate_methods, Opt}; use crate::{aggregate_methods, Opt};
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| { static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
@@ -139,9 +137,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
#[serde(rename = "vector.retrieve_vectors")] #[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool, retrieve_vectors: bool,
// maximum size of `ids` array. 0 if always empty or `null`
max_document_ids: usize,
// pagination // pagination
#[serde(rename = "pagination.max_limit")] #[serde(rename = "pagination.max_limit")]
max_limit: usize, max_limit: usize,
@@ -154,7 +149,7 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind { pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool }, PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize }, Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
} }
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> { impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
@@ -166,18 +161,12 @@ impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
} }
}; };
let ids = match query {
DocumentFetchKind::Normal { ids, .. } => *ids,
DocumentFetchKind::PerDocumentId { .. } => 0,
};
Self { Self {
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit, max_limit: limit,
max_offset: offset, max_offset: offset,
retrieve_vectors, retrieve_vectors,
max_document_ids: ids,
marker: PhantomData, marker: PhantomData,
} }
@@ -196,7 +185,6 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors, retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit), max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset), max_offset: self.max_offset.max(new.max_offset),
max_document_ids: self.max_document_ids.max(new.max_document_ids),
marker: PhantomData, marker: PhantomData,
}) })
} }
@@ -278,7 +266,6 @@ pub async fn get_document(
per_filter: false, per_filter: false,
max_limit: 0, max_limit: 0,
max_offset: 0, max_offset: 0,
max_document_ids: 0,
marker: PhantomData, marker: PhantomData,
}, },
&req, &req,
@@ -400,9 +387,6 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<bool>)] #[param(default, value_type = Option<bool>)]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: Param<bool>, retrieve_vectors: Param<bool>,
#[param(default, value_type = Option<Vec<String>>)]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentIds>)]
ids: Option<CS<String>>,
#[param(default, value_type = Option<String>, example = "popularity > 1000")] #[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>, filter: Option<String>,
@@ -424,9 +408,6 @@ pub struct BrowseQuery {
#[schema(default, example = true)] #[schema(default, example = true)]
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: bool, retrieve_vectors: bool,
#[schema(value_type = Option<Vec<String>>, example = json!(["cody", "finn", "brandy", "gambit"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentIds>)]
ids: Option<Vec<serde_json::Value>>,
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")] #[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>, filter: Option<Value>,
@@ -498,7 +479,6 @@ pub async fn documents_by_query_post(
retrieve_vectors: body.retrieve_vectors, retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit, max_limit: body.limit,
max_offset: body.offset, max_offset: body.offset,
max_document_ids: body.ids.as_ref().map(Vec::len).unwrap_or_default(),
per_document_id: false, per_document_id: false,
marker: PhantomData, marker: PhantomData,
}, },
@@ -571,8 +551,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET"); debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } = let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
params.into_inner();
let filter = match filter { let filter = match filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
@@ -582,15 +561,12 @@ pub async fn get_documents(
None => None, None => None,
}; };
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery { let query = BrowseQuery {
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
fields: fields.merge_star_and_none(), fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0, retrieve_vectors: retrieve_vectors.0,
filter, filter,
ids,
}; };
analytics.publish( analytics.publish(
@@ -599,7 +575,6 @@ pub async fn get_documents(
retrieve_vectors: query.retrieve_vectors, retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit, max_limit: query.limit,
max_offset: query.offset, max_offset: query.offset,
max_document_ids: query.ids.as_ref().map(Vec::len).unwrap_or_default(),
per_document_id: false, per_document_id: false,
marker: PhantomData, marker: PhantomData,
}, },
@@ -615,30 +590,15 @@ fn documents_by_query(
query: BrowseQuery, query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query; let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors); let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
let ids = if let Some(ids) = ids {
let mut parsed_ids = Vec::with_capacity(ids.len());
for (index, id) in ids.into_iter().enumerate() {
let id = id.try_into().map_err(|error| {
let msg = format!("In `.ids[{index}]`: {error}");
ResponseError::from_msg(msg, Code::InvalidDocumentIds)
})?;
parsed_ids.push(id)
}
Some(parsed_ids)
} else {
None
};
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents( let (total, documents) = retrieve_documents(
&index, &index,
offset, offset,
limit, limit,
ids,
filter, filter,
fields, fields,
retrieve_vectors, retrieve_vectors,
@@ -1491,12 +1451,10 @@ fn some_documents<'a, 't: 'a>(
})) }))
} }
#[allow(clippy::too_many_arguments)]
fn retrieve_documents<S: AsRef<str>>( fn retrieve_documents<S: AsRef<str>>(
index: &Index, index: &Index,
offset: usize, offset: usize,
limit: usize, limit: usize,
ids: Option<Vec<ExternalDocumentId>>,
filter: Option<Value>, filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>, attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
@@ -1510,28 +1468,16 @@ fn retrieve_documents<S: AsRef<str>>(
None None
}; };
let mut candidates = if let Some(ids) = ids { let candidates = if let Some(filter) = filter {
let external_document_ids = index.external_documents_ids(); filter.evaluate(&rtxn, index).map_err(|err| match err {
let mut candidates = RoaringBitmap::new();
for id in ids.iter() {
let Some(docid) = external_document_ids.get(&rtxn, id)? else {
continue;
};
candidates.insert(docid);
}
candidates
} else {
index.documents_ids(&rtxn)?
};
if let Some(filter) = filter {
candidates &= filter.evaluate(&rtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter) ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
} }
e => e.into(), e => e.into(),
})? })?
} } else {
index.documents_ids(&rtxn)?
};
let (it, number_of_documents) = { let (it, number_of_documents) = {
let number_of_documents = candidates.len(); let number_of_documents = candidates.len();

View File

@@ -68,8 +68,6 @@ pub struct FacetSearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>, pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>, pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchExhaustiveFacetCount>, default)]
pub exhaustive_facet_count: Option<bool>,
} }
#[derive(Default)] #[derive(Default)]
@@ -100,7 +98,6 @@ impl FacetSearchAggregator {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = query; } = query;
Self { Self {
@@ -113,8 +110,7 @@ impl FacetSearchAggregator {
|| attributes_to_search_on.is_some() || attributes_to_search_on.is_some()
|| hybrid.is_some() || hybrid.is_some()
|| ranking_score_threshold.is_some() || ranking_score_threshold.is_some()
|| locales.is_some() || locales.is_some(),
|| exhaustive_facet_count.is_some(),
..Default::default() ..Default::default()
} }
} }
@@ -297,24 +293,13 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = value; } = value;
// If exhaustive_facet_count is true, we need to set the page to 0
// because the facet search is not exhaustive by default.
let page = if exhaustive_facet_count.map_or(false, |exhaustive| exhaustive) {
// setting the page to 0 will force the search to be exhaustive when computing the number of hits,
// but it will skip the bucket sort saving time.
Some(0)
} else {
None
};
SearchQuery { SearchQuery {
q, q,
offset: DEFAULT_SEARCH_OFFSET(), offset: DEFAULT_SEARCH_OFFSET(),
limit: DEFAULT_SEARCH_LIMIT(), limit: DEFAULT_SEARCH_LIMIT(),
page, page: None,
hits_per_page: None, hits_per_page: None,
attributes_to_retrieve: None, attributes_to_retrieve: None,
retrieve_vectors: false, retrieve_vectors: false,

View File

@@ -518,7 +518,7 @@ impl From<index_scheduler::IndexStats> for IndexStats {
.inner_stats .inner_stats
.number_of_documents .number_of_documents
.unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()), .unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
raw_document_db_size: stats.inner_stats.documents_database_stats.total_size(), raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(),
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(), avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
is_indexing: stats.is_indexing, is_indexing: stats.is_indexing,
number_of_embeddings: stats.inner_stats.number_of_embeddings, number_of_embeddings: stats.inner_stats.number_of_embeddings,

View File

@@ -716,30 +716,7 @@ pub async fn delete_all(
fn validate_settings( fn validate_settings(
settings: Settings<Unchecked>, settings: Settings<Unchecked>,
index_scheduler: &IndexScheduler, _index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> { ) -> Result<Settings<Unchecked>, ResponseError> {
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbedderSource;
let features = index_scheduler.features();
if let Setting::Set(embedders) = &settings.embedders {
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
let Setting::Set(embedder) = embedder else {
continue;
};
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
features.check_composite_embedders("using `\"composite\"` as source")?;
}
if matches!(embedder.search_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `searchEmbedder`")?;
}
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `indexingEmbedder`")?;
}
}
}
Ok(settings.validate()?) Ok(settings.validate()?)
} }

View File

@@ -5,7 +5,7 @@ use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::{ErrorCode as _, ResponseError};
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::serde_cs::vec::CS;
@@ -111,7 +111,7 @@ pub async fn similar_get(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.into(); let query = params.0.try_into()?;
let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query); let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);
@@ -295,8 +295,10 @@ impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
} }
} }
impl From<SimilarQueryGet> for SimilarQuery { impl TryFrom<SimilarQueryGet> for SimilarQuery {
fn from( type Error = ResponseError;
fn try_from(
SimilarQueryGet { SimilarQueryGet {
id, id,
offset, offset,
@@ -309,7 +311,7 @@ impl From<SimilarQueryGet> for SimilarQuery {
embedder, embedder,
ranking_score_threshold, ranking_score_threshold,
}: SimilarQueryGet, }: SimilarQueryGet,
) -> Self { ) -> Result<Self, Self::Error> {
let filter = match filter { let filter = match filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v), Ok(v) => Some(v),
@@ -318,8 +320,10 @@ impl From<SimilarQueryGet> for SimilarQuery {
None => None, None => None,
}; };
SimilarQuery { Ok(SimilarQuery {
id: serde_json::Value::String(id.0), id: id.0.try_into().map_err(|code: InvalidSimilarId| {
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
filter, filter,
@@ -329,6 +333,6 @@ impl From<SimilarQueryGet> for SimilarQuery {
show_ranking_score: show_ranking_score.0, show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0, show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0), ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
} })
} }
} }

View File

@@ -340,8 +340,7 @@ impl SearchKind {
vector_len: Option<usize>, vector_len: Option<usize>,
route: Route, route: Route,
) -> Result<(String, Arc<Embedder>, bool), ResponseError> { ) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
let rtxn = index.read_txn()?; let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedder_configs = index.embedding_configs(&rtxn)?;
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?; let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
let (embedder, _, quantized) = embedders let (embedder, _, quantized) = embedders
@@ -636,7 +635,7 @@ impl SearchQueryWithIndex {
pub struct SimilarQuery { pub struct SimilarQuery {
#[deserr(error = DeserrJsonError<InvalidSimilarId>)] #[deserr(error = DeserrJsonError<InvalidSimilarId>)]
#[schema(value_type = String)] #[schema(value_type = String)]
pub id: serde_json::Value, pub id: ExternalDocumentId,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)] #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)]
pub offset: usize, pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)] #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)]
@@ -658,7 +657,8 @@ pub struct SimilarQuery {
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>, pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq, Deserr)]
#[deserr(try_from(Value) = TryFrom::try_from -> InvalidSimilarId)]
pub struct ExternalDocumentId(String); pub struct ExternalDocumentId(String);
impl AsRef<str> for ExternalDocumentId { impl AsRef<str> for ExternalDocumentId {
@@ -674,7 +674,7 @@ impl ExternalDocumentId {
} }
impl TryFrom<String> for ExternalDocumentId { impl TryFrom<String> for ExternalDocumentId {
type Error = milli::UserError; type Error = InvalidSimilarId;
fn try_from(value: String) -> Result<Self, Self::Error> { fn try_from(value: String) -> Result<Self, Self::Error> {
serde_json::Value::String(value).try_into() serde_json::Value::String(value).try_into()
@@ -682,10 +682,10 @@ impl TryFrom<String> for ExternalDocumentId {
} }
impl TryFrom<Value> for ExternalDocumentId { impl TryFrom<Value> for ExternalDocumentId {
type Error = milli::UserError; type Error = InvalidSimilarId;
fn try_from(value: Value) -> Result<Self, Self::Error> { fn try_from(value: Value) -> Result<Self, Self::Error> {
Ok(Self(milli::documents::validate_document_id_value(value)?)) Ok(Self(milli::documents::validate_document_id_value(value).map_err(|_| InvalidSimilarId)?))
} }
} }
@@ -916,7 +916,7 @@ fn prepare_search<'t>(
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10); let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
embedder embedder
.embed_search(query.q.as_ref().unwrap(), Some(deadline)) .embed_search(query.q.clone().unwrap(), Some(deadline))
.map_err(milli::vector::Error::from) .map_err(milli::vector::Error::from)
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
} }
@@ -1598,11 +1598,6 @@ pub fn perform_similar(
ranking_score_threshold, ranking_score_threshold,
} = query; } = query;
let id: ExternalDocumentId = id.try_into().map_err(|error| {
let msg = format!("Invalid value at `.id`: {error}");
ResponseError::from_msg(msg, Code::InvalidSimilarId)
})?;
// using let-else rather than `?` so that the borrow checker identifies we're always returning here, // using let-else rather than `?` so that the borrow checker identifies we're always returning here,
// preventing a use-after-move // preventing a use-after-move
let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else { let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else {

View File

@@ -281,8 +281,7 @@ async fn test_summarized_document_addition_or_update() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]", ".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@@ -304,8 +303,7 @@ async fn test_summarized_document_addition_or_update() {
"test": 1 "test": 1
}, },
"progressTrace": "[progressTrace]", "progressTrace": "[progressTrace]",
"writeChannelCongestion": "[writeChannelCongestion]", "writeChannelCongestion": "[writeChannelCongestion]"
"internalDatabaseSizes": "[internalDatabaseSizes]"
}, },
"duration": "[duration]", "duration": "[duration]",
"startedAt": "[date]", "startedAt": "[date]",
@@ -324,8 +322,7 @@ async fn test_summarized_document_addition_or_update() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]", ".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@@ -410,8 +407,7 @@ async fn test_summarized_delete_documents_by_batch() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]", ".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@@ -499,8 +495,7 @@ async fn test_summarized_delete_documents_by_filter() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]", ".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@@ -542,8 +537,7 @@ async fn test_summarized_delete_documents_by_filter() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]", ".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r#" @r#"
{ {
@@ -629,8 +623,7 @@ async fn test_summarized_delete_document_by_id() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]", ".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r#" @r#"
{ {
@@ -686,8 +679,7 @@ async fn test_summarized_settings_update() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]", ".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {

View File

@@ -411,7 +411,7 @@ impl<State> Index<'_, State> {
self.service.get(url).await self.service.get(url).await
} }
pub async fn fetch_documents(&self, payload: Value) -> (Value, StatusCode) { pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref())); let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
self.service.post(url, payload).await self.service.post(url, payload).await
} }

View File

@@ -1897,11 +1897,11 @@ async fn update_documents_with_geo_field() {
}, },
{ {
"id": "3", "id": "3",
"_geo": { "lat": 3, "lng": 0 }, "_geo": { "lat": 1, "lng": 1 },
}, },
{ {
"id": "4", "id": "4",
"_geo": { "lat": "4", "lng": "0" }, "_geo": { "lat": "1", "lng": "1" },
}, },
]); ]);
@@ -1928,7 +1928,9 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await; let (response, code) = index
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
// we are expecting docs 4 and 3 first as they have geo // we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@@ -1938,18 +1940,18 @@ async fn update_documents_with_geo_field() {
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "4", "lat": "1",
"lng": "0" "lng": "1"
}, },
"_geoDistance": 667170 "_geoDistance": 5522018
}, },
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 3, "lat": 1,
"lng": 0 "lng": 1
}, },
"_geoDistance": 778364 "_geoDistance": 5522018
}, },
{ {
"id": "1" "id": "1"
@@ -1967,13 +1969,10 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let updated_documents = json!([ let updated_documents = json!([{
{ "id": "3",
"id": "3", "doggo": "kefir",
"doggo": "kefir", }]);
"_geo": { "lat": 5, "lng": 0 },
}
]);
let (task, _status_code) = index.update_documents(updated_documents, None).await; let (task, _status_code) = index.update_documents(updated_documents, None).await;
let response = index.wait_task(task.uid()).await; let response = index.wait_task(task.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@@ -2013,16 +2012,16 @@ async fn update_documents_with_geo_field() {
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 5, "lat": 1,
"lng": 0 "lng": 1
}, },
"doggo": "kefir" "doggo": "kefir"
}, },
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "4", "lat": "1",
"lng": "0" "lng": "1"
} }
} }
], ],
@@ -2032,29 +2031,31 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await; let (response, code) = index
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo // the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@r###" @r###"
{ {
"hits": [ "hits": [
{
"id": "3",
"_geo": {
"lat": 5,
"lng": 0
},
"doggo": "kefir",
"_geoDistance": 555975
},
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "4", "lat": "1",
"lng": "0" "lng": "1"
}, },
"_geoDistance": 667170 "_geoDistance": 5522018
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
},
"doggo": "kefir",
"_geoDistance": 5522018
}, },
{ {
"id": "1" "id": "1"

View File

@@ -157,14 +157,11 @@ async fn delete_document_by_filter() {
index.wait_task(task.uid()).await.succeeded(); index.wait_task(task.uid()).await.succeeded();
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 4, "numberOfDocuments": 4,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 42,
"avgDocumentSize": "[size]", "avgDocumentSize": 10,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -211,14 +208,11 @@ async fn delete_document_by_filter() {
"###); "###);
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 16,
"avgDocumentSize": "[size]", "avgDocumentSize": 8,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -284,14 +278,11 @@ async fn delete_document_by_filter() {
"###); "###);
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 12,
"avgDocumentSize": "[size]", "avgDocumentSize": 12,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,

View File

@@ -667,7 +667,7 @@ async fn fetch_document_by_filter() {
.await; .await;
index.wait_task(task.uid()).await.succeeded(); index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!(null)).await; let (response, code) = index.get_document_by_filter(json!(null)).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@@ -678,7 +678,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.fetch_documents(json!({ "offset": "doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@@ -689,7 +689,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.fetch_documents(json!({ "limit": "doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@@ -700,7 +700,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.fetch_documents(json!({ "fields": "doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@@ -711,7 +711,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.fetch_documents(json!({ "filter": true })).await; let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@@ -722,7 +722,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await; let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@@ -733,7 +733,8 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await; let (response, code) =
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@@ -761,7 +762,8 @@ async fn retrieve_vectors() {
"###); "###);
// FETCHALL DOCUMENTS BY POST // FETCHALL DOCUMENTS BY POST
let (response, _code) = index.fetch_documents(json!({ "retrieveVectors": "tamo" })).await; let (response, _code) =
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`", "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",

View File

@@ -371,7 +371,7 @@ async fn get_document_by_filter() {
.await; .await;
index.wait_task(task.uid()).await.succeeded(); index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!({})).await; let (response, code) = index.get_document_by_filter(json!({})).await;
let (response2, code2) = index.get_all_documents_raw("").await; let (response2, code2) = index.get_all_documents_raw("").await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
@@ -401,7 +401,7 @@ async fn get_document_by_filter() {
assert_eq!(code, code2); assert_eq!(code, code2);
assert_eq!(response, response2); assert_eq!(response, response2);
let (response, code) = index.fetch_documents(json!({ "filter": "color = blue" })).await; let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await; let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
@@ -424,8 +424,9 @@ async fn get_document_by_filter() {
assert_eq!(code, code2); assert_eq!(code, code2);
assert_eq!(response, response2); assert_eq!(response, response2);
let (response, code) = let (response, code) = index
index.fetch_documents(json!({ "offset": 1, "limit": 1, "filter": "color != blue" })).await; .get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
.await;
let (response2, code2) = let (response2, code2) =
index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await; index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@@ -445,7 +446,9 @@ async fn get_document_by_filter() {
assert_eq!(response, response2); assert_eq!(response, response2);
let (response, code) = index let (response, code) = index
.fetch_documents(json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] })) .get_document_by_filter(
json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
)
.await; .await;
let (response2, code2) = let (response2, code2) =
index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await; index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
@@ -468,7 +471,7 @@ async fn get_document_by_filter() {
// Now testing more complex filter that the get route can't represent // Now testing more complex filter that the get route can't represent
let (response, code) = let (response, code) =
index.fetch_documents(json!({ "filter": [["color = blue", "color = red"]] })).await; index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{ {
@@ -492,8 +495,9 @@ async fn get_document_by_filter() {
} }
"###); "###);
let (response, code) = let (response, code) = index
index.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"] })).await; .get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{ {
@@ -510,326 +514,6 @@ async fn get_document_by_filter() {
"###); "###);
} }
#[actix_rt::test]
async fn get_document_by_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.fetch_documents(json!({
"ids": ["0", 1, 2, 3]
}))
.await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,1,2,3").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index.fetch_documents(json!({ "ids": [2, "1"] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=2,1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) =
index.fetch_documents(json!({ "offset": 1, "limit": 1, "ids": ["0", 0, 3] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=3,0&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 3
}
],
"offset": 1,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) =
index.fetch_documents(json!({ "limit": 1, "ids": [0, 3], "fields": ["color"] })).await;
let (response2, code2) = index.get_all_documents_raw("?limit=1&ids=0,3&fields=color").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"color": "red"
}
],
"offset": 0,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
// Now testing more complex requests that the get route can't represent
let (response, code) = index.fetch_documents(json!({ "ids": [] })).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}
#[actix_rt::test]
async fn get_document_invalid_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!({"ids": ["0", "illegal/docid"] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,illegal/docid").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"message": "In `.ids[1]`: Document identifier `\"illegal/docid\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_document_ids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_ids"
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
}
#[actix_rt::test]
async fn get_document_not_found_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!({"ids": ["0", 3, 42] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,3,42").await;
// the document with id 42 is not in the results since it doesn't exist
// however, no error is raised
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
}
#[actix_rt::test]
async fn get_document_by_ids_and_filter() {
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings_filterable_attributes(json!(["color"])).await;
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) =
index.fetch_documents(json!({"ids": [2], "filter": "color = blue" })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=2&filter=color=blue").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.fetch_documents(
json!({ "offset": 1, "limit": 1, "ids": [0, 1, 2, 3], "filter": "color != blue" }),
)
.await;
let (response2, code2) =
index.get_all_documents_raw("?ids=0,1,2,3&filter=color!=blue&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 3
}
],
"offset": 1,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.fetch_documents(json!({ "limit": 1, "ids": [0, 1, 2,3], "filter": "color != blue", "fields": ["color"] }))
.await;
let (response2, code2) =
index.get_all_documents_raw("?ids=0,1,2,3&limit=1&filter=color!=blue&fields=color").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"color": "red"
}
],
"offset": 0,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
// Now testing more complex filter that the get route can't represent
let (response, code) = index
.fetch_documents(json!({ "ids": [0, "2"], "filter": [["color = blue", "color = red"]] }))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (response, code) = index
.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"], "ids": [1, 2, 3] }))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn get_document_with_vectors() { async fn get_document_with_vectors() {
let server = Server::new().await; let server = Server::new().await;

View File

@@ -28,15 +28,12 @@ async fn import_dump_v1_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -188,15 +185,12 @@ async fn import_dump_v1_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -361,15 +355,12 @@ async fn import_dump_v1_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 8606,
"avgDocumentSize": "[size]", "avgDocumentSize": 162,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -531,15 +522,12 @@ async fn import_dump_v2_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -691,15 +679,12 @@ async fn import_dump_v2_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -861,15 +846,12 @@ async fn import_dump_v2_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 8606,
"avgDocumentSize": "[size]", "avgDocumentSize": 162,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -1028,15 +1010,12 @@ async fn import_dump_v3_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -1188,15 +1167,12 @@ async fn import_dump_v3_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -1358,15 +1334,12 @@ async fn import_dump_v3_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 8606,
"avgDocumentSize": "[size]", "avgDocumentSize": 162,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -1525,15 +1498,12 @@ async fn import_dump_v4_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -1685,15 +1655,12 @@ async fn import_dump_v4_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 21965,
"avgDocumentSize": "[size]", "avgDocumentSize": 414,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -1855,15 +1822,12 @@ async fn import_dump_v4_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 8606,
"avgDocumentSize": "[size]", "avgDocumentSize": 162,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -2030,14 +1994,11 @@ async fn import_dump_v5() {
let (stats, code) = index1.stats().await; let (stats, code) = index1.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 10, "numberOfDocuments": 10,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 6782,
"avgDocumentSize": "[size]", "avgDocumentSize": 678,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -2070,15 +2031,12 @@ async fn import_dump_v5() {
let (stats, code) = index2.stats().await; let (stats, code) = index2.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats, { json_string!(stats),
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 10, "numberOfDocuments": 10,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 6782,
"avgDocumentSize": "[size]", "avgDocumentSize": 678,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -2174,8 +2132,7 @@ async fn import_dump_v6_containing_experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
@@ -2279,7 +2236,6 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
".results[0].duration" => "[date]", ".results[0].duration" => "[date]",
".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.progressTrace" => "[progressTrace]",
".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]",
".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]",
}), name: "batches"); }), name: "batches");
let (indexes, code) = server.list_indexes(None, None).await; let (indexes, code) = server.list_indexes(None, None).await;
@@ -2298,8 +2254,7 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
@@ -2403,8 +2358,7 @@ async fn generate_and_import_dump_containing_vectors() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);

View File

@@ -23,8 +23,7 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
@@ -38,8 +37,7 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
@@ -53,8 +51,7 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
@@ -69,8 +66,7 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
@@ -85,8 +81,7 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
} }
@@ -108,8 +103,7 @@ async fn experimental_feature_metrics() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false, "getTaskDocumentsRoute": false
"compositeEmbedders": false
} }
"###); "###);
@@ -144,6 +138,14 @@ async fn experimental_feature_metrics() {
let (response, code) = server.get_metrics().await; let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null"); meili_snap::snapshot!(response, @"null");
// startup without flag respects persisted metrics value
let disable_metrics =
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
let (response, code) = server_no_flag.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
} }
#[actix_rt::test] #[actix_rt::test]
@@ -156,7 +158,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`", "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -615,336 +615,3 @@ async fn facet_search_with_filterable_attributes_rules_errors() {
}, },
).await; ).await;
} }
#[actix_rt::test]
async fn distinct_facet_search_on_movies() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{
"id": 1,
"title": "Carol",
"genres": ["Romance", "Drama", "Blob"],
"color": "crimson"
},
{
"id": 2,
"title": "Wonder Woman",
"genres": ["Action", "Adventure", "Blob"],
"color": "emerald"
},
{
"id": 3,
"title": "Life of Pi",
"genres": ["Adventure", "Drama", "Blob"],
"color": "azure"
},
{
"id": 4,
"title": "Mad Max: Fury Road",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "scarlet"
},
{
"id": 5,
"title": "Moana",
"genres": ["Fantasy", "Action", "Blob"],
"color": "coral"
},
{
"id": 6,
"title": "Philadelphia",
"genres": ["Drama", "Blob"],
"color": "navy"
},
{
"id": 7,
"title": "The Matrix",
"genres": ["Science Fiction", "Action", "Blob"],
"color": "onyx"
},
{
"id": 8,
"title": "Inception",
"genres": ["Science Fiction", "Thriller", "Blob"],
"color": "cerulean"
},
{
"id": 9,
"title": "The Shawshank Redemption",
"genres": ["Drama", "Blob"],
"color": "slate"
},
{
"id": 10,
"title": "Pulp Fiction",
"genres": ["Crime", "Drama", "Blob"],
"color": "gold"
},
{
"id": 11,
"title": "The Dark Knight",
"genres": ["Action", "Crime", "Blob"],
"color": "obsidian"
},
{
"id": 12,
"title": "Forrest Gump",
"genres": ["Drama", "Romance", "Blob"],
"color": "jade"
},
{
"id": 13,
"title": "The Godfather",
"genres": ["Crime", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 14,
"title": "Fight Club",
"genres": ["Drama", "Thriller", "Blob"],
"color": "ruby"
},
{
"id": 15,
"title": "Goodfellas",
"genres": ["Crime", "Biography", "Blob"],
"color": "charcoal"
},
{
"id": 16,
"title": "The Silence of the Lambs",
"genres": ["Crime", "Thriller", "Blob"],
"color": "amethyst"
},
{
"id": 17,
"title": "Schindler's List",
"genres": ["Biography", "Drama", "Blob"],
"color": "ebony"
},
{
"id": 18,
"title": "The Lord of the Rings",
"genres": ["Adventure", "Fantasy", "Blob"],
"color": "forest"
},
{
"id": 19,
"title": "Star Wars",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "amber"
},
{
"id": 20,
"title": "Jurassic Park",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "lime"
},
{
"id": 21,
"title": "Titanic",
"genres": ["Drama", "Romance", "Blob"],
"color": "sapphire"
},
{
"id": 22,
"title": "The Avengers",
"genres": ["Action", "Science Fiction", "Blob"],
"color": "burgundy"
},
{
"id": 23,
"title": "Avatar",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "turquoise"
},
{
"id": 24,
"title": "The Green Mile",
"genres": ["Crime", "Fantasy", "Blob"],
"color": "emerald"
},
{
"id": 25,
"title": "Gladiator",
"genres": ["Action", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 26,
"title": "The Departed",
"genres": ["Crime", "Thriller", "Blob"],
"color": "crimson"
},
{
"id": 27,
"title": "Saving Private Ryan",
"genres": ["Drama", "War", "Blob"],
"color": "slate"
},
{
"id": 28,
"title": "Interstellar",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "azure"
},
{
"id": 29,
"title": "The Pianist",
"genres": ["Biography", "Drama", "Blob"],
"color": "onyx"
},
{
"id": 30,
"title": "The Usual Suspects",
"genres": ["Crime", "Mystery", "Blob"],
"color": "charcoal"
},
{
"id": 31,
"title": "The Sixth Sense",
"genres": ["Mystery", "Thriller", "Blob"],
"color": "amethyst"
},
{
"id": 32,
"title": "The Princess Bride",
"genres": ["Adventure", "Romance", "Blob"],
"color": "ruby"
},
{
"id": 33,
"title": "Blade Runner",
"genres": ["Science Fiction", "Noir", "Blob"],
"color": "sapphire"
},
{
"id": 34,
"title": "The Big Lebowski",
"genres": ["Comedy", "Crime", "Blob"],
"color": "gold"
},
{
"id": 35,
"title": "Good Will Hunting",
"genres": ["Drama", "Romance", "Blob"],
"color": "turquoise"
},
{
"id": 36,
"title": "The Terminator",
"genres": ["Action", "Science Fiction", "Blob"],
"color": "obsidian"
},
{
"id": 37,
"title": "Casablanca",
"genres": ["Drama", "Romance", "Blob"],
"color": "jade"
},
{
"id": 38,
"title": "The Exorcist",
"genres": ["Horror", "Thriller", "Blob"],
"color": "burgundy"
},
{
"id": 39,
"title": "Apocalypse Now",
"genres": ["Drama", "War", "Blob"],
"color": "forest"
},
{
"id": 40,
"title": "Back to the Future",
"genres": ["Adventure", "Comedy", "Blob"],
"color": "amber"
},
{
"id": 41,
"title": "The Graduate",
"genres": ["Comedy", "Drama", "Blob"],
"color": "azure"
},
{
"id": 42,
"title": "Alien",
"genres": ["Horror", "Science Fiction", "Blob"],
"color": "obsidian"
},
{
"id": 43,
"title": "The Breakfast Club",
"genres": ["Drama", "Comedy", "Blob"],
"color": "coral"
},
{
"id": 44,
"title": "Die Hard",
"genres": ["Action", "Thriller", "Blob"],
"color": "scarlet"
},
{
"id": 45,
"title": "The Sound of Music",
"genres": ["Drama", "Musical", "Blob"],
"color": "emerald"
},
{
"id": 46,
"title": "Jaws",
"genres": ["Horror", "Thriller", "Blob"],
"color": "navy"
},
{
"id": 47,
"title": "Rocky",
"genres": ["Drama", "Sport", "Blob"],
"color": "burgundy"
},
{
"id": 48,
"title": "E.T. the Extra-Terrestrial",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "amber"
},
{
"id": 49,
"title": "The Godfather Part II",
"genres": ["Crime", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 50,
"title": "One Flew Over the Cuckoo's Nest",
"genres": ["Drama", "Blob"],
"color": "slate"
}
]);
let (response, code) =
index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index.update_settings_distinct_attribute(json!("color")).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, _code) = index.add_documents(documents, None).await;
index.wait_task(response.uid()).await;
let (response, code) =
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "" })).await;
// non-exhaustive facet count is counting 27 documents with the facet query "blob" but there are only 23 documents with a distinct color.
assert_eq!(code, 200, "{}", response);
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":27}]"###);
let (response, code) =
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "", "exhaustiveFacetCount": true })).await;
// exhaustive facet count is counting 23 documents with the facet query "blob" which is the number of distinct colors.
assert_eq!(code, 200, "{}", response);
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":23}]"###);
}

View File

@@ -1783,146 +1783,6 @@ async fn test_nested_fields() {
.await; .await;
} }
#[actix_rt::test]
async fn test_typo_settings() {
let documents = json!([
{
"id": 0,
"title": "The zeroth document",
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule",
},
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field",
},
{
"prout": "truc",
"machin": "lol",
},
],
},
{
"id": 3,
"title": "The third document",
"nested": "I lied",
},
]);
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnAttributes": ["title"]
}
}),
&json!({"q": "document"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
},
{
"id": 3,
"title": "The third document",
"nested": "I lied"
}
]
"###);
},
)
.await;
// Test prefix search
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnAttributes": ["title"]
}
}),
&json!({"q": "docume"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
},
{
"id": 3,
"title": "The third document",
"nested": "I lied"
}
]
"###);
},
)
.await;
}
/// Modifying facets with different casing should work correctly /// Modifying facets with different casing should work correctly
#[actix_rt::test] #[actix_rt::test]
async fn change_facet_casing() { async fn change_facet_casing() {

View File

@@ -55,11 +55,11 @@ async fn similar_bad_id() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await; server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": ["doggo"], "embedder": "manual"})).await; let (response, code) = index.similar_post(json!({"id": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value at `.id`: Document identifier `[\"doggo\"]` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_similar_id", "code": "invalid_similar_id",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id" "link": "https://docs.meilisearch.com/errors#invalid_similar_id"
@@ -145,12 +145,11 @@ async fn similar_invalid_id() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await; server.wait_task(response.uid()).await;
let (response, code) = let (response, code) = index.similar_post(json!({"id": "http://invalid-docid/"})).await;
index.similar_post(json!({"id": "http://invalid-docid/", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value at `.id`: Document identifier `\"http://invalid-docid/\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_similar_id", "code": "invalid_similar_id",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id" "link": "https://docs.meilisearch.com/errors#invalid_similar_id"

View File

@@ -110,14 +110,11 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 27,
"avgDocumentSize": "[size]", "avgDocumentSize": 13,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 5, "numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@@ -138,14 +135,11 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 27,
"avgDocumentSize": "[size]", "avgDocumentSize": 13,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 3, "numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@@ -166,14 +160,11 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 27,
"avgDocumentSize": "[size]", "avgDocumentSize": 13,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 2, "numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@@ -195,14 +186,11 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 27,
"avgDocumentSize": "[size]", "avgDocumentSize": 13,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 2, "numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 1, "numberOfEmbeddedDocuments": 1,
@@ -248,14 +236,11 @@ async fn add_remove_embedded_documents() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 27,
"avgDocumentSize": "[size]", "avgDocumentSize": 13,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 5, "numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@@ -272,14 +257,11 @@ async fn add_remove_embedded_documents() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 13,
"avgDocumentSize": "[size]", "avgDocumentSize": 13,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 3, "numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 1, "numberOfEmbeddedDocuments": 1,
@@ -308,14 +290,11 @@ async fn update_embedder_settings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 108,
"avgDocumentSize": "[size]", "avgDocumentSize": 54,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -347,14 +326,11 @@ async fn update_embedder_settings() {
server.wait_task(response.uid()).await.succeeded(); server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(json_string!(stats), @r###"
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": "[size]", "rawDocumentDbSize": 108,
"avgDocumentSize": "[size]", "avgDocumentSize": 54,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 3, "numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,

View File

@@ -43,7 +43,7 @@ async fn version_too_old() {
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap(); std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.14.0"); snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.13.3");
} }
#[actix_rt::test] #[actix_rt::test]
@@ -58,12 +58,15 @@ async fn version_requires_downgrade() {
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap(); std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.14.1 is higher than the Meilisearch version 1.14.0. Downgrade is not supported"); snapshot!(err, @"Database version 1.13.4 is higher than the Meilisearch version 1.13.3. Downgrade is not supported");
} }
#[actix_rt::test] #[actix_rt::test]
async fn upgrade_to_the_current_version() { async fn upgrade_to_the_current_version() {
let temp = tempfile::tempdir().unwrap(); let temp = tempfile::tempdir().unwrap();
let server = Server::new_with_options(default_settings(temp.path())).await.unwrap();
drop(server);
let server = Server::new_with_options(Opt { let server = Server::new_with_options(Opt {
experimental_dumpless_upgrade: true, experimental_dumpless_upgrade: true,
..default_settings(temp.path()) ..default_settings(temp.path())

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.14.0" "upgradeTo": "v1.13.3"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@@ -108,10 +108,6 @@ async fn check_the_keys(server: &Server) {
/// 5.2. Enqueue a new task /// 5.2. Enqueue a new task
/// 5.3. Create an index /// 5.3. Create an index
async fn check_the_index_scheduler(server: &Server) { async fn check_the_index_scheduler(server: &Server) {
// Wait until the upgrade has been applied to all indexes to avoid flakyness
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
// All the indexes are still present // All the indexes are still present
let (indexes, _) = server.list_indexes(None, None).await; let (indexes, _) = server.list_indexes(None, None).await;
snapshot!(indexes, @r#" snapshot!(indexes, @r#"
@@ -133,9 +129,7 @@ async fn check_the_index_scheduler(server: &Server) {
let (stats, _) = server.stats().await; let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, { assert_json_snapshot!(stats, {
".databaseSize" => "[bytes]", ".databaseSize" => "[bytes]",
".usedDatabaseSize" => "[bytes]", ".usedDatabaseSize" => "[bytes]"
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
".indexes.kefir.avgDocumentSize" => "[bytes]",
}, },
@r###" @r###"
{ {
@@ -145,8 +139,8 @@ async fn check_the_index_scheduler(server: &Server) {
"indexes": { "indexes": {
"kefir": { "kefir": {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": "[bytes]", "rawDocumentDbSize": 109,
"avgDocumentSize": "[bytes]", "avgDocumentSize": 109,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -162,6 +156,10 @@ async fn check_the_index_scheduler(server: &Server) {
} }
"###); "###);
// Wait until the upgrade has been applied to all indexes to avoid flakyness
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
// Tasks and batches should still work // Tasks and batches should still work
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration. // We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
// The other tasks should NOT change // The other tasks should NOT change
@@ -195,33 +193,31 @@ async fn check_the_index_scheduler(server: &Server) {
// Tests all the batches query parameters // Tests all the batches query parameters
let (batches, _) = server.batches_filter("uids=10").await; let (batches, _) = server.batches_filter("uids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
let (batches, _) = server.batches_filter("batchUids=10").await; let (batches, _) = server.batches_filter("batchUids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
let (batches, _) = server.batches_filter("statuses=canceled").await; let (batches, _) = server.batches_filter("statuses=canceled").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database // types has already been tested above to retrieve the upgrade database
let (batches, _) = server.batches_filter("canceledBy=19").await; let (batches, _) = server.batches_filter("canceledBy=19").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
let (stats, _) = server.stats().await; let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, { assert_json_snapshot!(stats, {
".databaseSize" => "[bytes]", ".databaseSize" => "[bytes]",
".usedDatabaseSize" => "[bytes]", ".usedDatabaseSize" => "[bytes]"
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
".indexes.kefir.avgDocumentSize" => "[bytes]",
}, },
@r###" @r###"
{ {
@@ -231,8 +227,8 @@ async fn check_the_index_scheduler(server: &Server) {
"indexes": { "indexes": {
"kefir": { "kefir": {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": "[bytes]", "rawDocumentDbSize": 109,
"avgDocumentSize": "[bytes]", "avgDocumentSize": 109,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@@ -249,14 +245,11 @@ async fn check_the_index_scheduler(server: &Server) {
"###); "###);
let index = server.index("kefir"); let index = server.index("kefir");
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(json_string!(stats, { snapshot!(stats, @r###"
".rawDocumentDbSize" => "[bytes]",
".avgDocumentSize" => "[bytes]",
}), @r###"
{ {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": "[bytes]", "rawDocumentDbSize": 109,
"avgDocumentSize": "[bytes]", "avgDocumentSize": 109,
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,

View File

@@ -164,87 +164,6 @@ async fn add_remove_user_provided() {
"###); "###);
} }
#[actix_rt::test]
async fn user_provide_mismatched_embedding_dimension() {
let server = Server::new().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let new_document = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }},
]);
let (response, code) = index.add_documents(new_document, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
async fn generate_default_user_provided_documents(server: &Server) -> Index { async fn generate_default_user_provided_documents(server: &Server) -> Index {
let index = server.index("doggo"); let index = server.index("doggo");

View File

@@ -1995,7 +1995,7 @@ async fn timeout() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "grand chien de berger des montagnes foil the cache", "q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 0.99, "embedder": "default"} "hybrid": {"semanticRatio": 0.99, "embedder": "default"}
})) }))
.await; .await;

View File

@@ -412,117 +412,6 @@ async fn ollama_url_checks() {
async fn composite_checks() { async fn composite_checks() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
// feature not enabled, using source
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "using `\"composite\"` as source requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using search embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `searchEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using indexing embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `indexingEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// enable feature
let (_, code) = server.set_features(json!({"compositeEmbedders": true})).await;
snapshot!(code, @"200 OK");
// inner distribution // inner distribution
let (response, _code) = index let (response, _code) = index
.update_settings(json!({ .update_settings(json!({

View File

@@ -10,6 +10,7 @@ license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.95" anyhow = "1.0.95"
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
clap = { version = "4.5.24", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] }
dump = { path = "../dump" } dump = { path = "../dump" }
file-store = { path = "../file-store" } file-store = { path = "../file-store" }

View File

@@ -7,11 +7,11 @@ use anyhow::{bail, Context};
use clap::{Parser, Subcommand, ValueEnum}; use clap::{Parser, Subcommand, ValueEnum};
use dump::{DumpWriter, IndexMetadata}; use dump::{DumpWriter, IndexMetadata};
use file_store::FileStore; use file_store::FileStore;
use meilisearch_auth::{open_auth_store_env, AuthController}; use meilisearch_auth::AuthController;
use meilisearch_types::batches::Batch; use meilisearch_types::batches::Batch;
use meilisearch_types::heed::types::{Bytes, SerdeJson, Str}; use meilisearch_types::heed::types::{Bytes, SerdeJson, Str};
use meilisearch_types::heed::{ use meilisearch_types::heed::{
CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls, CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
}; };
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@@ -172,7 +172,7 @@ fn main() -> anyhow::Result<()> {
/// Clears the task queue located at `db_path`. /// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks"); let path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&path) } let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
.with_context(|| format!("While trying to open {:?}", path.display()))?; .with_context(|| format!("While trying to open {:?}", path.display()))?;
eprintln!("Deleting tasks from the database..."); eprintln!("Deleting tasks from the database...");
@@ -225,7 +225,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
} }
fn try_opening_database<KC: 'static, DC: 'static>( fn try_opening_database<KC: 'static, DC: 'static>(
env: &Env<WithoutTls>, env: &Env,
rtxn: &RoTxn, rtxn: &RoTxn,
db_name: &str, db_name: &str,
) -> anyhow::Result<Database<KC, DC>> { ) -> anyhow::Result<Database<KC, DC>> {
@@ -235,7 +235,7 @@ fn try_opening_database<KC: 'static, DC: 'static>(
} }
fn try_opening_poly_database( fn try_opening_poly_database(
env: &Env<WithoutTls>, env: &Env,
rtxn: &RoTxn, rtxn: &RoTxn,
db_name: &str, db_name: &str,
) -> anyhow::Result<Database<Unspecified, Unspecified>> { ) -> anyhow::Result<Database<Unspecified, Unspecified>> {
@@ -284,18 +284,13 @@ fn export_a_dump(
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path) .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Dumping the keys..."); eprintln!("Dumping the keys...");
// 2. dump the keys // 2. dump the keys
let auth_path = db_path.join("auth"); let auth_store = AuthController::new(&db_path, &None)
std::fs::create_dir_all(&auth_path).context("While creating the auth directory")?;
let auth_env = open_auth_store_env(&auth_path).context("While opening the auth store")?;
let auth_store = AuthController::new(auth_env, &None)
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?; .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
let mut dump_keys = dump.create_keys()?; let mut dump_keys = dump.create_keys()?;
let mut count = 0; let mut count = 0;
@@ -391,10 +386,9 @@ fn export_a_dump(
for result in index_mapping.iter(&rtxn)? { for result in index_mapping.iter(&rtxn)? {
let (uid, uuid) = result?; let (uid, uuid) = result?;
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false) let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
.with_context(|| { format!("While trying to open the index at path {:?}", index_path.display())
format!("While trying to open the index at path {:?}", index_path.display()) })?;
})?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let metadata = IndexMetadata { let metadata = IndexMetadata {
@@ -444,10 +438,8 @@ fn export_a_dump(
fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path) .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let rtxn = env.read_txn()?; let rtxn = env.read_txn()?;
let index_mapping: Database<Str, UuidCodec> = let index_mapping: Database<Str, UuidCodec> =
@@ -464,10 +456,9 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
} }
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false) let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
.with_context(|| { format!("While trying to open the index at path {:?}", index_path.display())
format!("While trying to open the index at path {:?}", index_path.display()) })?;
})?;
eprintln!("Awaiting for a mutable transaction..."); eprintln!("Awaiting for a mutable transaction...");
let _wtxn = index.write_txn().context("While awaiting for a write transaction")?; let _wtxn = index.write_txn().context("While awaiting for a write transaction")?;
@@ -479,7 +470,7 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
eprintln!("Compacting the index..."); eprintln!("Compacting the index...");
let before_compaction = Instant::now(); let before_compaction = Instant::now();
let new_file = index let new_file = index
.copy_to_path(&compacted_index_file_path, CompactionOption::Enabled) .copy_to_file(&compacted_index_file_path, CompactionOption::Enabled)
.with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?;
let after_size = new_file.metadata()?.len(); let after_size = new_file.metadata()?.len();
@@ -523,10 +514,8 @@ fn export_documents(
offset: Option<usize>, offset: Option<usize>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path) .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let rtxn = env.read_txn()?; let rtxn = env.read_txn()?;
let index_mapping: Database<Str, UuidCodec> = let index_mapping: Database<Str, UuidCodec> =
@@ -537,10 +526,9 @@ fn export_documents(
if uid == index_name { if uid == index_name {
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = let index =
Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false) Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
.with_context(|| { format!("While trying to open the index at path {:?}", index_path.display())
format!("While trying to open the index at path {:?}", index_path.display()) })?;
})?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&rtxn)?; let fields_ids_map = index.fields_ids_map(&rtxn)?;
@@ -628,10 +616,8 @@ fn hair_dryer(
index_parts: &[IndexPart], index_parts: &[IndexPart],
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path) .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Trying to get a read transaction on the index scheduler..."); eprintln!("Trying to get a read transaction on the index scheduler...");
@@ -644,10 +630,9 @@ fn hair_dryer(
if index_names.iter().any(|i| i == uid) { if index_names.iter().any(|i| i == uid) {
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = let index =
Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false) Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
.with_context(|| { format!("While trying to open the index at path {:?}", index_path.display())
format!("While trying to open the index at path {:?}", index_path.display()) })?;
})?;
eprintln!("Trying to get a read transaction on the {uid} index..."); eprintln!("Trying to get a read transaction on the {uid} index...");

View File

@@ -2,9 +2,7 @@ use std::path::Path;
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{ use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls,
};
use meilisearch_types::milli::index::{db_name, main_key}; use meilisearch_types::milli::index::{db_name, main_key};
use super::v1_9; use super::v1_9;
@@ -94,7 +92,7 @@ fn update_index_stats(
fn update_date_format( fn update_date_format(
index_uid: &str, index_uid: &str,
index_env: &Env<WithoutTls>, index_env: &Env,
index_wtxn: &mut RwTxn, index_wtxn: &mut RwTxn,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
@@ -108,7 +106,7 @@ fn update_date_format(
fn find_rest_embedders( fn find_rest_embedders(
index_uid: &str, index_uid: &str,
index_env: &Env<WithoutTls>, index_env: &Env,
index_txn: &RoTxn, index_txn: &RoTxn,
) -> anyhow::Result<Vec<String>> { ) -> anyhow::Result<Vec<String>> {
let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
@@ -166,10 +164,8 @@ pub fn v1_9_to_v1_10(
// 2. REST embedders. We don't support this case right now, so bail // 2. REST embedders. We don't support this case right now, so bail
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path) .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let mut sched_wtxn = env.write_txn()?; let mut sched_wtxn = env.write_txn()?;
@@ -209,13 +205,9 @@ pub fn v1_9_to_v1_10(
let index_env = unsafe { let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file // FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new() EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
.read_txn_without_tls() format!("while opening index {uid} at '{}'", index_path.display())
.max_dbs(25) })?
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
}; };
let index_txn = index_env.read_txn().with_context(|| { let index_txn = index_env.read_txn().with_context(|| {
@@ -260,13 +252,9 @@ pub fn v1_9_to_v1_10(
let index_env = unsafe { let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file // FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new() EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
.read_txn_without_tls() format!("while opening index {uid} at '{}'", index_path.display())
.max_dbs(25) })?
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
}; };
let mut index_wtxn = index_env.write_txn().with_context(|| { let mut index_wtxn = index_env.write_txn().with_context(|| {

View File

@@ -23,10 +23,8 @@ pub fn v1_10_to_v1_11(
println!("Upgrading from v1.10.0 to v1.11.0"); println!("Upgrading from v1.10.0 to v1.11.0");
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path) .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let sched_rtxn = env.read_txn()?; let sched_rtxn = env.read_txn()?;
@@ -52,13 +50,9 @@ pub fn v1_10_to_v1_11(
); );
let index_env = unsafe { let index_env = unsafe {
EnvOpenOptions::new() EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
.read_txn_without_tls() format!("while opening index {uid} at '{}'", index_path.display())
.max_dbs(25) })?
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
}; };
let index_rtxn = index_env.read_txn().with_context(|| { let index_rtxn = index_env.read_txn().with_context(|| {
@@ -82,11 +76,11 @@ pub fn v1_10_to_v1_11(
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY) try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
.with_context(|| format!("while updating date format for index `{uid}`"))?; .with_context(|| format!("while updating date format for index `{uid}`"))?;
meilisearch_types::milli::arroy::upgrade::cosine_from_0_4_to_0_5( arroy_v04_to_v05::ugrade_from_prev_version(
&index_rtxn, &index_rtxn,
index_read_database.remap_types(), index_read_database,
&mut index_wtxn, &mut index_wtxn,
index_write_database.remap_types(), index_write_database,
)?; )?;
index_wtxn.commit()?; index_wtxn.commit()?;

View File

@@ -115,10 +115,8 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
/// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3 /// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3
fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> { fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path) .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let mut sched_wtxn = env.write_txn()?; let mut sched_wtxn = env.write_txn()?;
@@ -175,12 +173,11 @@ fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
println!("\t- Rebuilding field distribution"); println!("\t- Rebuilding field distribution");
let index = meilisearch_types::milli::Index::new( let index =
EnvOpenOptions::new().read_txn_without_tls(), meilisearch_types::milli::Index::new(EnvOpenOptions::new(), &index_path, false)
&index_path, .with_context(|| {
false, format!("while opening index {uid} at '{}'", index_path.display())
) })?;
.with_context(|| format!("while opening index {uid} at '{}'", index_path.display()))?;
let mut index_txn = index.write_txn()?; let mut index_txn = index.write_txn()?;

View File

@@ -18,7 +18,7 @@ bincode = "1.3.3"
bstr = "1.11.3" bstr = "1.11.3"
bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] } bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.9.3", default-features = false } charabia = { version = "0.9.2", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
convert_case = "0.6.0" convert_case = "0.6.0"
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
@@ -28,13 +28,11 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geoutils = "0.5.1" geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = [ grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
"rayon", heed = { version = "0.20.5", default-features = false, features = [
"tempfile",
] }
heed = { version = "0.22.0", default-features = false, features = [
"serde-json", "serde-json",
"serde-bincode", "serde-bincode",
"read-txn-no-tls",
] } ] }
indexmap = { version = "2.7.0", features = ["serde"] } indexmap = { version = "2.7.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" } json-depth-checker = { path = "../json-depth-checker" }
@@ -87,7 +85,7 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
"no_time", "no_time",
"sync", "sync",
] } ] }
arroy = "0.6.1" arroy = "0.5.0"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.41" tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] } ureq = { version = "2.12.1", features = ["json"] }
@@ -103,14 +101,7 @@ uell = "0.1.0"
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
bbqueue = { git = "https://github.com/meilisearch/bbqueue" } bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
flume = { version = "0.11.1", default-features = false } flume = { version = "0.11.1", default-features = false }
utoipa = { version = "5.3.1", features = [ utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
"non_strict_integers",
"preserve_order",
"uuid",
"time",
"openapi_extensions",
] }
lru = "0.13.0"
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
@@ -122,7 +113,9 @@ meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] } rand = { version = "0.8.5", features = ["small_rng"] }
[features] [features]
all-tokenizations = ["charabia/default"] all-tokenizations = [
"charabia/default",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml # For more information on this feature, see heed's Cargo.toml

View File

@@ -1,13 +1,8 @@
use std::mem; use heed::types::Bytes;
use heed::Database; use heed::Database;
use heed::DatabaseStat;
use heed::RoTxn; use heed::RoTxn;
use heed::Unspecified;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::BEU32;
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
/// The stats of a database. /// The stats of a database.
@@ -25,24 +20,58 @@ impl DatabaseStats {
/// ///
/// This function iterates over the whole database and computes the stats. /// This function iterates over the whole database and computes the stats.
/// It is not efficient and should be cached somewhere. /// It is not efficient and should be cached somewhere.
pub(crate) fn new( pub(crate) fn new(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'_>) -> heed::Result<Self> {
database: Database<BEU32, Unspecified>, let mut database_stats =
rtxn: &RoTxn<'_>, Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 };
) -> heed::Result<Self> {
let DatabaseStat { page_size, depth: _, branch_pages, leaf_pages, overflow_pages, entries } =
database.stat(rtxn)?;
// We first take the total size without overflow pages as the overflow pages contains the values and only that. let mut iter = database.iter(rtxn)?;
let total_size = (branch_pages + leaf_pages + overflow_pages) * page_size as usize; while let Some((key, value)) = iter.next().transpose()? {
// We compute an estimated size for the keys. let key_size = key.len() as u64;
let total_key_size = entries * (mem::size_of::<u32>() + 4); let value_size = value.len() as u64;
let total_value_size = total_size - total_key_size; database_stats.total_key_size += key_size;
database_stats.total_value_size += value_size;
}
Ok(Self { database_stats.number_of_entries = database.len(rtxn)?;
number_of_entries: entries as u64,
total_key_size: total_key_size as u64, Ok(database_stats)
total_value_size: total_value_size as u64, }
})
/// Recomputes the stats of the database and returns the new stats.
///
/// This function is used to update the stats of the database when some keys are modified.
/// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states.
pub(crate) fn recompute<I, K>(
mut stats: Self,
database: Database<Bytes, Bytes>,
before_rtxn: &RoTxn<'_>,
after_rtxn: &RoTxn<'_>,
modified_keys: I,
) -> heed::Result<Self>
where
I: IntoIterator<Item = K>,
K: AsRef<[u8]>,
{
for key in modified_keys {
let key = key.as_ref();
if let Some(value) = database.get(after_rtxn, key)? {
let key_size = key.len() as u64;
let value_size = value.len() as u64;
stats.total_key_size = stats.total_key_size.saturating_add(key_size);
stats.total_value_size = stats.total_value_size.saturating_add(value_size);
}
if let Some(value) = database.get(before_rtxn, key)? {
let key_size = key.len() as u64;
let value_size = value.len() as u64;
stats.total_key_size = stats.total_key_size.saturating_sub(key_size);
stats.total_value_size = stats.total_value_size.saturating_sub(value_size);
}
}
stats.number_of_entries = database.len(after_rtxn)?;
Ok(stats)
} }
pub fn average_key_size(&self) -> u64 { pub fn average_key_size(&self) -> u64 {
@@ -57,10 +86,6 @@ impl DatabaseStats {
self.number_of_entries self.number_of_entries
} }
pub fn total_size(&self) -> u64 {
self.total_key_size + self.total_value_size
}
pub fn total_key_size(&self) -> u64 { pub fn total_key_size(&self) -> u64 {
self.total_key_size self.total_key_size
} }

View File

@@ -80,13 +80,9 @@ impl DocumentsBatchIndex {
let mut map = Object::new(); let mut map = Object::new();
for (k, v) in document.iter() { for (k, v) in document.iter() {
let key = self // TODO: TAMO: update the error type
.0 let key =
.get_by_left(&k) self.0.get_by_left(&k).ok_or(crate::error::InternalError::DatabaseClosing)?.clone();
.ok_or(crate::error::InternalError::FieldIdMapMissingEntry(
FieldIdMapMissingEntry::FieldId { field_id: k, process: "recreate_json" },
))?
.clone();
let value = serde_json::from_slice::<serde_json::Value>(v) let value = serde_json::from_slice::<serde_json::Value>(v)
.map_err(crate::error::InternalError::SerdeJson)?; .map_err(crate::error::InternalError::SerdeJson)?;
map.insert(key, value); map.insert(key, value);

View File

@@ -33,6 +33,8 @@ pub enum Error {
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum InternalError { pub enum InternalError {
#[error("{}", HeedError::DatabaseClosing)]
DatabaseClosing,
#[error("missing {} in the {db_name} database", key.unwrap_or("key"))] #[error("missing {} in the {db_name} database", key.unwrap_or("key"))]
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> }, DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
#[error("missing {key} in the fieldids weights mapping")] #[error("missing {key} in the fieldids weights mapping")]
@@ -129,14 +131,6 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidGeoField(#[from] GeoError), InvalidGeoField(#[from] GeoError),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)] #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize }, InvalidVectorDimensions { expected: usize, found: usize },
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
InvalidIndexingVectorDimensions {
embedder_name: String,
document_id: String,
embedding_index: usize,
expected: usize,
found: usize,
},
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")] #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
InvalidVectorsMapType { document_id: String, value: Value }, InvalidVectorsMapType { document_id: String, value: Value },
#[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")] #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
@@ -203,8 +197,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
valid_fields: BTreeSet<String>, valid_fields: BTreeSet<String>,
hidden_fields: bool, hidden_fields: bool,
}, },
#[error("An LMDB environment is already opened")] #[error("an environment is already opened with different options")]
EnvAlreadyOpened, InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
SortRankingRuleMissing, SortRankingRuleMissing,
#[error("The database file is in an invalid state.")] #[error("The database file is in an invalid state.")]
@@ -368,8 +362,7 @@ impl From<arroy::Error> for Error {
| arroy::Error::UnmatchingDistance { .. } | arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_) | arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. } | arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_) | arroy::Error::MissingMetadata(_) => {
| arroy::Error::CannotDecodeKeyMode { .. } => {
Error::InternalError(InternalError::ArroyError(value)) Error::InternalError(InternalError::ArroyError(value))
} }
} }
@@ -523,7 +516,8 @@ impl From<HeedError> for Error {
// TODO use the encoding // TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::EnvAlreadyOpened { .. } => UserError(EnvAlreadyOpened), HeedError::DatabaseClosing => InternalError(DatabaseClosing),
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
} }
} }
} }

View File

@@ -3,9 +3,8 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
use heed::{types::*, DatabaseStat, WithoutTls}; use heed::types::*;
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use indexmap::IndexMap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -111,7 +110,7 @@ pub mod db_name {
#[derive(Clone)] #[derive(Clone)]
pub struct Index { pub struct Index {
/// The LMDB environment which this index is associated with. /// The LMDB environment which this index is associated with.
pub(crate) env: heed::Env<WithoutTls>, pub(crate) env: heed::Env,
/// Contains many different types (e.g. the fields ids map). /// Contains many different types (e.g. the fields ids map).
pub(crate) main: Database<Unspecified, Unspecified>, pub(crate) main: Database<Unspecified, Unspecified>,
@@ -178,7 +177,7 @@ pub struct Index {
impl Index { impl Index {
pub fn new_with_creation_dates<P: AsRef<Path>>( pub fn new_with_creation_dates<P: AsRef<Path>>(
mut options: heed::EnvOpenOptions<WithoutTls>, mut options: heed::EnvOpenOptions,
path: P, path: P,
created_at: time::OffsetDateTime, created_at: time::OffsetDateTime,
updated_at: time::OffsetDateTime, updated_at: time::OffsetDateTime,
@@ -276,7 +275,7 @@ impl Index {
} }
pub fn new<P: AsRef<Path>>( pub fn new<P: AsRef<Path>>(
options: heed::EnvOpenOptions<WithoutTls>, options: heed::EnvOpenOptions,
path: P, path: P,
creation: bool, creation: bool,
) -> Result<Index> { ) -> Result<Index> {
@@ -285,7 +284,7 @@ impl Index {
} }
fn set_creation_dates( fn set_creation_dates(
env: &heed::Env<WithoutTls>, env: &heed::Env,
main: Database<Unspecified, Unspecified>, main: Database<Unspecified, Unspecified>,
created_at: time::OffsetDateTime, created_at: time::OffsetDateTime,
updated_at: time::OffsetDateTime, updated_at: time::OffsetDateTime,
@@ -307,12 +306,12 @@ impl Index {
} }
/// Create a read transaction to be able to read the index. /// Create a read transaction to be able to read the index.
pub fn read_txn(&self) -> heed::Result<RoTxn<'_, WithoutTls>> { pub fn read_txn(&self) -> heed::Result<RoTxn<'_>> {
self.env.read_txn() self.env.read_txn()
} }
/// Create a static read transaction to be able to read the index without keeping a reference to it. /// Create a static read transaction to be able to read the index without keeping a reference to it.
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static, WithoutTls>> { pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
self.env.clone().static_read_txn() self.env.clone().static_read_txn()
} }
@@ -341,12 +340,8 @@ impl Index {
self.env.info().map_size self.env.info().map_size
} }
pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> { pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_file(file, option).map_err(Into::into) self.env.copy_to_file(path, option).map_err(Into::into)
}
pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_path(path, option).map_err(Into::into)
} }
/// Returns an `EnvClosingEvent` that can be used to wait for the closing event, /// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
@@ -411,6 +406,38 @@ impl Index {
Ok(count.unwrap_or_default()) Ok(count.unwrap_or_default())
} }
/// Updates the stats of the documents database based on the previous stats and the modified docids.
pub fn update_documents_stats(
&self,
wtxn: &mut RwTxn<'_>,
modified_docids: roaring::RoaringBitmap,
) -> Result<()> {
let before_rtxn = self.read_txn()?;
let document_stats = match self.documents_stats(&before_rtxn)? {
Some(before_stats) => DatabaseStats::recompute(
before_stats,
self.documents.remap_types(),
&before_rtxn,
wtxn,
modified_docids.iter().map(|docid| docid.to_be_bytes()),
)?,
None => {
// This should never happen when there are already documents in the index, the documents stats should be present.
// If it happens, it means that the index was not properly initialized/upgraded.
debug_assert_eq!(
self.documents.len(&before_rtxn)?,
0,
"The documents stats should be present when there are documents in the index"
);
tracing::warn!("No documents stats found, creating new ones");
DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
}
};
self.put_documents_stats(wtxn, document_stats)?;
Ok(())
}
/// Writes the stats of the documents database. /// Writes the stats of the documents database.
pub fn put_documents_stats( pub fn put_documents_stats(
&self, &self,
@@ -1724,122 +1751,6 @@ impl Index {
} }
Ok(stats) Ok(stats)
} }
/// Check if the word is indexed in the index.
///
/// This function checks if the word is indexed in the index by looking at the word_docids and exact_word_docids.
///
/// # Arguments
///
/// * `rtxn`: The read transaction.
/// * `word`: The word to check.
pub fn contains_word(&self, rtxn: &RoTxn<'_>, word: &str) -> Result<bool> {
Ok(self.word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some()
|| self.exact_word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some())
}
/// Returns the sizes in bytes of each of the index database at the given rtxn.
pub fn database_sizes(&self, rtxn: &RoTxn<'_>) -> heed::Result<IndexMap<&'static str, usize>> {
let Self {
env: _,
main,
external_documents_ids,
word_docids,
exact_word_docids,
word_prefix_docids,
exact_word_prefix_docids,
word_pair_proximity_docids,
word_position_docids,
word_fid_docids,
word_prefix_position_docids,
word_prefix_fid_docids,
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_normalized_string_strings,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
vector_arroy,
embedder_category_id,
documents,
} = self;
fn compute_size(stats: DatabaseStat) -> usize {
let DatabaseStat {
page_size,
depth: _,
branch_pages,
leaf_pages,
overflow_pages,
entries: _,
} = stats;
(branch_pages + leaf_pages + overflow_pages) * page_size as usize
}
let mut sizes = IndexMap::new();
sizes.insert("main", main.stat(rtxn).map(compute_size)?);
sizes
.insert("external_documents_ids", external_documents_ids.stat(rtxn).map(compute_size)?);
sizes.insert("word_docids", word_docids.stat(rtxn).map(compute_size)?);
sizes.insert("exact_word_docids", exact_word_docids.stat(rtxn).map(compute_size)?);
sizes.insert("word_prefix_docids", word_prefix_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"exact_word_prefix_docids",
exact_word_prefix_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"word_pair_proximity_docids",
word_pair_proximity_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert("word_position_docids", word_position_docids.stat(rtxn).map(compute_size)?);
sizes.insert("word_fid_docids", word_fid_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"word_prefix_position_docids",
word_prefix_position_docids.stat(rtxn).map(compute_size)?,
);
sizes
.insert("word_prefix_fid_docids", word_prefix_fid_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"field_id_word_count_docids",
field_id_word_count_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert("facet_id_f64_docids", facet_id_f64_docids.stat(rtxn).map(compute_size)?);
sizes
.insert("facet_id_string_docids", facet_id_string_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"facet_id_normalized_string_strings",
facet_id_normalized_string_strings.stat(rtxn).map(compute_size)?,
);
sizes.insert("facet_id_string_fst", facet_id_string_fst.stat(rtxn).map(compute_size)?);
sizes
.insert("facet_id_exists_docids", facet_id_exists_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"facet_id_is_null_docids",
facet_id_is_null_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"facet_id_is_empty_docids",
facet_id_is_empty_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"field_id_docid_facet_f64s",
field_id_docid_facet_f64s.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"field_id_docid_facet_strings",
field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
);
sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
Ok(sizes)
}
} }
#[derive(Debug, Deserialize, Serialize)] #[derive(Debug, Deserialize, Serialize)]
@@ -1914,8 +1825,7 @@ pub(crate) mod tests {
impl TempIndex { impl TempIndex {
/// Creates a temporary index /// Creates a temporary index
pub fn new_with_map_size(size: usize) -> Self { pub fn new_with_map_size(size: usize) -> Self {
let options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(size); options.map_size(size);
let _tempdir = TempDir::new_in(".").unwrap(); let _tempdir = TempDir::new_in(".").unwrap();
let inner = Index::new(options, _tempdir.path(), true).unwrap(); let inner = Index::new(options, _tempdir.path(), true).unwrap();

View File

@@ -83,8 +83,6 @@ pub use self::search::{
}; };
pub use self::update::ChannelCongestion; pub use self::update::ChannelCongestion;
pub use arroy;
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;
pub type Attribute = u32; pub type Attribute = u32;

View File

@@ -1,4 +1,3 @@
use enum_iterator::Sequence;
use std::any::TypeId; use std::any::TypeId;
use std::borrow::Cow; use std::borrow::Cow;
use std::marker::PhantomData; use std::marker::PhantomData;
@@ -77,14 +76,6 @@ impl Progress {
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect() durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
} }
// TODO: ideally we should expose the progress in a way that let arroy use it directly
pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) {
self.update_progress(progress.main);
if let Some(sub) = progress.sub {
self.update_progress(sub);
}
}
} }
/// Generate the names associated with the durations and push them. /// Generate the names associated with the durations and push them.
@@ -190,18 +181,8 @@ macro_rules! make_atomic_progress {
}; };
} }
make_atomic_progress!(Document alias AtomicDocumentStep => "document"); make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload"); make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
make_enum_progress! {
pub enum MergingWordCache {
WordDocids,
WordFieldIdDocids,
ExactWordDocids,
WordPositionDocids,
FieldIdWordCountDocids,
}
}
#[derive(Debug, Serialize, Clone, ToSchema)] #[derive(Debug, Serialize, Clone, ToSchema)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@@ -257,44 +238,3 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
self.total self.total
} }
} }
impl Step for arroy::MainStep {
fn name(&self) -> Cow<'static, str> {
match self {
arroy::MainStep::PreProcessingTheItems => "pre processing the items",
arroy::MainStep::WritingTheDescendantsAndMetadata => {
"writing the descendants and metadata"
}
arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes",
arroy::MainStep::UpdatingTheTrees => "updating the trees",
arroy::MainStep::CreateNewTrees => "create new trees",
arroy::MainStep::WritingNodesToDatabase => "writing nodes to database",
arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees",
arroy::MainStep::WriteTheMetadata => "write the metadata",
}
.into()
}
fn current(&self) -> u32 {
*self as u32
}
fn total(&self) -> u32 {
Self::CARDINALITY as u32
}
}
impl Step for arroy::SubStep {
fn name(&self) -> Cow<'static, str> {
self.unit.into()
}
fn current(&self) -> u32 {
self.current.load(Ordering::Relaxed)
}
fn total(&self) -> u32 {
self.max
}
}

View File

@@ -203,7 +203,7 @@ impl<'a> Search<'a> {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3); let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
match embedder.embed_search(&query, Some(deadline)) { match embedder.embed_search(query, Some(deadline)) {
Ok(embedding) => embedding, Ok(embedding) => embedding,
Err(error) => { Err(error) => {
tracing::error!(error=%error, "Embedding failed"); tracing::error!(error=%error, "Embedding failed");

View File

@@ -173,19 +173,17 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_rule_scores.push(ScoreDetails::Skipped); ranking_rule_scores.push(ScoreDetails::Skipped);
// remove candidates from the universe without adding them to result if their score is below the threshold // remove candidates from the universe without adding them to result if their score is below the threshold
let is_below_threshold = if let Some(ranking_score_threshold) = ranking_score_threshold {
ranking_score_threshold.is_some_and(|ranking_score_threshold| { let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter()); if current_score < ranking_score_threshold {
current_score < ranking_score_threshold all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
}); back!();
continue;
if is_below_threshold { }
all_candidates -= &bucket;
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
} else {
maybe_add_to_results!(bucket);
} }
maybe_add_to_results!(bucket);
ranking_rule_scores.pop(); ranking_rule_scores.pop();
if cur_ranking_rule_index == 0 { if cur_ranking_rule_index == 0 {
@@ -239,24 +237,23 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
); );
// remove candidates from the universe without adding them to result if their score is below the threshold // remove candidates from the universe without adding them to result if their score is below the threshold
let is_below_threshold = ranking_score_threshold.is_some_and(|ranking_score_threshold| { if let Some(ranking_score_threshold) = ranking_score_threshold {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter()); let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
current_score < ranking_score_threshold if current_score < ranking_score_threshold {
}); all_candidates -=
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
back!();
continue;
}
}
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1 if cur_ranking_rule_index == ranking_rules_len - 1
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1) || (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|| cur_offset + (next_bucket.candidates.len() as usize) < from || cur_offset + (next_bucket.candidates.len() as usize) < from
|| is_below_threshold
{ {
if is_below_threshold { maybe_add_to_results!(next_bucket.candidates);
all_candidates -= &next_bucket.candidates;
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
} else {
maybe_add_to_results!(next_bucket.candidates);
}
ranking_rule_scores.pop(); ranking_rule_scores.pop();
continue; continue;
} }

View File

@@ -1,12 +1,10 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::ops::ControlFlow; use std::ops::ControlFlow;
use fst::automaton::Str; use fst::automaton::Str;
use fst::{IntoStreamer, Streamer}; use fst::{Automaton, IntoStreamer, Streamer};
use heed::types::DecodeIgnore; use heed::types::DecodeIgnore;
use itertools::{merge_join_by, EitherOrBoth};
use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm}; use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union}; use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
@@ -18,10 +16,16 @@ use crate::{Result, MAX_WORD_LENGTH};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberOfTypos { pub enum NumberOfTypos {
Zero,
One, One,
Two, Two,
} }
pub enum ZeroOrOneTypo {
Zero,
One,
}
impl Interned<QueryTerm> { impl Interned<QueryTerm> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> { pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
let s = ctx.term_interner.get_mut(self); let s = ctx.term_interner.get_mut(self);
@@ -43,45 +47,34 @@ impl Interned<QueryTerm> {
} }
fn find_zero_typo_prefix_derivations( fn find_zero_typo_prefix_derivations(
ctx: &mut SearchContext<'_>,
word_interned: Interned<String>, word_interned: Interned<String>,
fst: fst::Set<Cow<'_, [u8]>>,
word_interner: &mut DedupInterner<String>,
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>, mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
) -> Result<()> { ) -> Result<()> {
let word = ctx.word_interner.get(word_interned).to_owned(); let word = word_interner.get(word_interned).to_owned();
let word = word.as_str(); let word = word.as_str();
let prefix = Str::new(word).starts_with();
let mut stream = fst.search(prefix).into_stream();
let words = while let Some(derived_word) = stream.next() {
ctx.index.word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?; let derived_word = std::str::from_utf8(derived_word)?.to_owned();
let exact_words = let derived_word_interned = word_interner.insert(derived_word);
ctx.index.exact_word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?; if derived_word_interned != word_interned {
let cf = visit(derived_word_interned)?;
for eob in merge_join_by(words, exact_words, |lhs, rhs| match (lhs, rhs) { if cf.is_break() {
(Ok((word, _)), Ok((exact_word, _))) => word.cmp(exact_word), break;
(Err(_), _) | (_, Err(_)) => Ordering::Equal,
}) {
match eob {
EitherOrBoth::Both(kv, _) | EitherOrBoth::Left(kv) | EitherOrBoth::Right(kv) => {
let (derived_word, _) = kv?;
let derived_word = derived_word.to_string();
let derived_word_interned = ctx.word_interner.insert(derived_word);
if derived_word_interned != word_interned {
let cf = visit(derived_word_interned)?;
if cf.is_break() {
break;
}
}
} }
} }
} }
Ok(()) Ok(())
} }
fn find_one_typo_derivations( fn find_zero_one_typo_derivations(
ctx: &mut SearchContext<'_>, ctx: &mut SearchContext<'_>,
word_interned: Interned<String>, word_interned: Interned<String>,
is_prefix: bool, is_prefix: bool,
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>, mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>,
) -> Result<()> { ) -> Result<()> {
let fst = ctx.get_words_fst()?; let fst = ctx.get_words_fst()?;
let word = ctx.word_interner.get(word_interned).to_owned(); let word = ctx.word_interner.get(word_interned).to_owned();
@@ -96,9 +89,16 @@ fn find_one_typo_derivations(
let derived_word = ctx.word_interner.insert(derived_word.to_owned()); let derived_word = ctx.word_interner.insert(derived_word.to_owned());
let d = dfa.distance(state.1); let d = dfa.distance(state.1);
match d.to_u8() { match d.to_u8() {
0 => (), 0 => {
if derived_word != word_interned {
let cf = visit(derived_word, ZeroOrOneTypo::Zero)?;
if cf.is_break() {
break;
}
}
}
1 => { 1 => {
let cf = visit(derived_word)?; let cf = visit(derived_word, ZeroOrOneTypo::One)?;
if cf.is_break() { if cf.is_break() {
break; break;
} }
@@ -111,7 +111,7 @@ fn find_one_typo_derivations(
Ok(()) Ok(())
} }
fn find_one_two_typo_derivations( fn find_zero_one_two_typo_derivations(
word_interned: Interned<String>, word_interned: Interned<String>,
is_prefix: bool, is_prefix: bool,
fst: fst::Set<Cow<'_, [u8]>>, fst: fst::Set<Cow<'_, [u8]>>,
@@ -144,7 +144,14 @@ fn find_one_two_typo_derivations(
// correct distance // correct distance
let d = second_dfa.distance((state.1).0); let d = second_dfa.distance((state.1).0);
match d.to_u8() { match d.to_u8() {
0 => (), 0 => {
if derived_word_interned != word_interned {
let cf = visit(derived_word_interned, NumberOfTypos::Zero)?;
if cf.is_break() {
break;
}
}
}
1 => { 1 => {
let cf = visit(derived_word_interned, NumberOfTypos::One)?; let cf = visit(derived_word_interned, NumberOfTypos::One)?;
if cf.is_break() { if cf.is_break() {
@@ -187,6 +194,8 @@ pub fn partially_initialized_term_from_word(
}); });
} }
let fst = ctx.index.words_fst(ctx.txn)?;
let use_prefix_db = is_prefix let use_prefix_db = is_prefix
&& (ctx && (ctx
.index .index
@@ -206,19 +215,24 @@ pub fn partially_initialized_term_from_word(
let mut zero_typo = None; let mut zero_typo = None;
let mut prefix_of = BTreeSet::new(); let mut prefix_of = BTreeSet::new();
if ctx.index.contains_word(ctx.txn, word)? { if fst.contains(word) || ctx.index.exact_word_docids.get(ctx.txn, word)?.is_some() {
zero_typo = Some(word_interned); zero_typo = Some(word_interned);
} }
if is_prefix && use_prefix_db.is_none() { if is_prefix && use_prefix_db.is_none() {
find_zero_typo_prefix_derivations(ctx, word_interned, |derived_word| { find_zero_typo_prefix_derivations(
if prefix_of.len() < limits::MAX_PREFIX_COUNT { word_interned,
prefix_of.insert(derived_word); fst,
Ok(ControlFlow::Continue(())) &mut ctx.word_interner,
} else { |derived_word| {
Ok(ControlFlow::Break(())) if prefix_of.len() < limits::MAX_PREFIX_COUNT {
} prefix_of.insert(derived_word);
})?; Ok(ControlFlow::Continue(()))
} else {
Ok(ControlFlow::Break(()))
}
},
)?;
} }
let synonyms = ctx.index.synonyms(ctx.txn)?; let synonyms = ctx.index.synonyms(ctx.txn)?;
let mut synonym_word_count = 0; let mut synonym_word_count = 0;
@@ -281,13 +295,18 @@ impl Interned<QueryTerm> {
let mut one_typo_words = BTreeSet::new(); let mut one_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 { if *max_nbr_typos > 0 {
find_one_typo_derivations(ctx, original, is_prefix, |derived_word| { find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT { match nbr_typos {
one_typo_words.insert(derived_word); ZeroOrOneTypo::Zero => {}
Ok(ControlFlow::Continue(())) ZeroOrOneTypo::One => {
} else { if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
Ok(ControlFlow::Break(())) one_typo_words.insert(derived_word);
} else {
return Ok(ControlFlow::Break(()));
}
}
} }
Ok(ControlFlow::Continue(()))
})?; })?;
} }
@@ -338,7 +357,7 @@ impl Interned<QueryTerm> {
let mut two_typo_words = BTreeSet::new(); let mut two_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 { if *max_nbr_typos > 0 {
find_one_two_typo_derivations( find_zero_one_two_typo_derivations(
*original, *original,
*is_prefix, *is_prefix,
ctx.index.words_fst(ctx.txn)?, ctx.index.words_fst(ctx.txn)?,
@@ -351,6 +370,7 @@ impl Interned<QueryTerm> {
return Ok(ControlFlow::Break(())); return Ok(ControlFlow::Break(()));
} }
match nbr_typos { match nbr_typos {
NumberOfTypos::Zero => {}
NumberOfTypos::One => { NumberOfTypos::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT { if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word); one_typo_words.insert(derived_word);

View File

@@ -15,8 +15,7 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();

View File

@@ -352,7 +352,7 @@ pub(crate) mod test_helpers {
use grenad::MergerBuilder; use grenad::MergerBuilder;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn, WithoutTls}; use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::bulk::FacetsUpdateBulkInner; use super::bulk::FacetsUpdateBulkInner;
@@ -390,7 +390,7 @@ pub(crate) mod test_helpers {
for<'a> BoundCodec: for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{ {
pub env: Env<WithoutTls>, pub env: Env,
pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub group_size: Cell<u8>, pub group_size: Cell<u8>,
pub min_level_size: Cell<u8>, pub min_level_size: Cell<u8>,
@@ -412,8 +412,7 @@ pub(crate) mod test_helpers {
let group_size = group_size.clamp(2, 127); let group_size = group_size.clamp(2, 127);
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127 let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
let options = heed::EnvOpenOptions::new(); let mut options = heed::EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
let options = options.map_size(4096 * 4 * 1000 * 100); let options = options.map_size(4096 * 4 * 1000 * 100);
let tempdir = tempfile::TempDir::new().unwrap(); let tempdir = tempfile::TempDir::new().unwrap();
let env = unsafe { options.open(tempdir.path()) }.unwrap(); let env = unsafe { options.open(tempdir.path()) }.unwrap();

View File

@@ -28,11 +28,9 @@ pub use self::helpers::*;
pub use self::transform::{Transform, TransformOutput}; pub use self::transform::{Transform, TransformOutput};
use super::facet::clear_facet_levels_based_on_settings_diff; use super::facet::clear_facet_levels_based_on_settings_diff;
use super::new::StdResult; use super::new::StdResult;
use crate::database_stats::DatabaseStats;
use crate::documents::{obkv_to_object, DocumentsBatchReader}; use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError}; use crate::error::{Error, InternalError};
use crate::index::{PrefixSearch, PrefixSettings}; use crate::index::{PrefixSearch, PrefixSettings};
use crate::progress::Progress;
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder; use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
pub use crate::update::index_documents::helpers::CursorClonableMmap; pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{ use crate::update::{
@@ -477,8 +475,7 @@ where
if !settings_diff.settings_update_only { if !settings_diff.settings_update_only {
// Update the stats of the documents database when there is a document update. // Update the stats of the documents database when there is a document update.
let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?; self.index.update_documents_stats(self.wtxn, modified_docids)?;
self.index.put_documents_stats(self.wtxn, stats)?;
} }
// We write the field distribution into the main database // We write the field distribution into the main database
self.index.put_field_distribution(self.wtxn, &field_distribution)?; self.index.put_field_distribution(self.wtxn, &field_distribution)?;
@@ -523,16 +520,7 @@ where
pool.install(|| { pool.install(|| {
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized); let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
writer.build_and_quantize( writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?;
wtxn,
// In the settings we don't have any progress to share
&Progress::default(),
&mut rng,
dimension,
is_quantizing,
self.indexer_config.max_memory,
cancel,
)?;
Result::Ok(()) Result::Ok(())
}) })
.map_err(InternalError::from)??; .map_err(InternalError::from)??;
@@ -2811,9 +2799,8 @@ mod tests {
embedding_configs.pop().unwrap(); embedding_configs.pop().unwrap();
insta::assert_snapshot!(embedder_name, @"manual"); insta::assert_snapshot!(embedder_name, @"manual");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>");
let embedder = std::sync::Arc::new( let embedder =
crate::vector::Embedder::new(embedder.embedder_options, 0).unwrap(), std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
);
let res = index let res = index
.search(&rtxn) .search(&rtxn)
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec())) .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))

View File

@@ -1,6 +1,5 @@
use bumpalo::Bump; use bumpalo::Bump;
use heed::RoTxn; use heed::RoTxn;
use serde_json::Value;
use super::document::{ use super::document::{
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
@@ -11,7 +10,7 @@ use super::vector_document::{
use crate::attribute_patterns::PatternMatch; use crate::attribute_patterns::PatternMatch;
use crate::documents::FieldIdMapper; use crate::documents::FieldIdMapper;
use crate::vector::EmbeddingConfigs; use crate::vector::EmbeddingConfigs;
use crate::{DocumentId, Index, InternalError, Result}; use crate::{DocumentId, Index, Result};
pub enum DocumentChange<'doc> { pub enum DocumentChange<'doc> {
Deletion(Deletion<'doc>), Deletion(Deletion<'doc>),
@@ -244,29 +243,6 @@ impl<'doc> Update<'doc> {
Ok(has_deleted_fields) Ok(has_deleted_fields)
} }
/// Returns `true` if the geo fields have changed.
pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
&self,
rtxn: &'t RoTxn,
index: &'t Index,
mapper: &'t Mapper,
) -> Result<bool> {
let current = self.current(rtxn, index, mapper)?;
let current_geo = current.geo_field()?;
let updated_geo = self.only_changed_fields().geo_field()?;
match (current_geo, updated_geo) {
(Some(current_geo), Some(updated_geo)) => {
let current: Value =
serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
let updated: Value =
serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
Ok(current != updated)
}
(None, None) => Ok(false),
_ => Ok(true),
}
}
pub fn only_changed_vectors( pub fn only_changed_vectors(
&self, &self,
doc_alloc: &'doc Bump, doc_alloc: &'doc Bump,

View File

@@ -117,7 +117,7 @@ impl FacetedDocidsExtractor {
}, },
), ),
DocumentChange::Update(inner) => { DocumentChange::Update(inner) => {
let has_changed = inner.has_changed_for_fields( if !inner.has_changed_for_fields(
&mut |field_name| { &mut |field_name| {
match_faceted_field( match_faceted_field(
field_name, field_name,
@@ -130,10 +130,7 @@ impl FacetedDocidsExtractor {
rtxn, rtxn,
index, index,
context.db_fields_ids_map, context.db_fields_ids_map,
)?; )? {
let has_changed_for_geo_fields =
inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?;
if !has_changed && !has_changed_for_geo_fields {
return Ok(()); return Ok(());
} }

View File

@@ -121,7 +121,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
// do we have set embeddings? // do we have set embeddings?
if let Some(embeddings) = new_vectors.embeddings { if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors( chunks.set_vectors(
update.external_document_id(),
update.docid(), update.docid(),
embeddings embeddings
.into_vec(&context.doc_alloc, embedder_name) .into_vec(&context.doc_alloc, embedder_name)
@@ -129,7 +128,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
document_id: update.external_document_id().to_string(), document_id: update.external_document_id().to_string(),
error: error.to_string(), error: error.to_string(),
})?, })?,
)?; );
} else if new_vectors.regenerate { } else if new_vectors.regenerate {
let new_rendered = prompt.render_document( let new_rendered = prompt.render_document(
update.external_document_id(), update.external_document_id(),
@@ -210,7 +209,6 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
chunks.set_regenerate(insertion.docid(), new_vectors.regenerate); chunks.set_regenerate(insertion.docid(), new_vectors.regenerate);
if let Some(embeddings) = new_vectors.embeddings { if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors( chunks.set_vectors(
insertion.external_document_id(),
insertion.docid(), insertion.docid(),
embeddings embeddings
.into_vec(&context.doc_alloc, embedder_name) .into_vec(&context.doc_alloc, embedder_name)
@@ -220,7 +218,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
.to_string(), .to_string(),
error: error.to_string(), error: error.to_string(),
})?, })?,
)?; );
} else if new_vectors.regenerate { } else if new_vectors.regenerate {
let rendered = prompt.render_document( let rendered = prompt.render_document(
insertion.external_document_id(), insertion.external_document_id(),
@@ -275,7 +273,6 @@ struct Chunks<'a, 'b, 'extractor> {
embedder: &'a Embedder, embedder: &'a Embedder,
embedder_id: u8, embedder_id: u8,
embedder_name: &'a str, embedder_name: &'a str,
dimensions: usize,
prompt: &'a Prompt, prompt: &'a Prompt,
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes, possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>, user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
@@ -300,7 +297,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint(); let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
let texts = BVec::with_capacity_in(capacity, doc_alloc); let texts = BVec::with_capacity_in(capacity, doc_alloc);
let ids = BVec::with_capacity_in(capacity, doc_alloc); let ids = BVec::with_capacity_in(capacity, doc_alloc);
let dimensions = embedder.dimensions();
Self { Self {
texts, texts,
ids, ids,
@@ -313,7 +309,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
embedder_name, embedder_name,
user_provided, user_provided,
has_manual_generation: None, has_manual_generation: None,
dimensions,
} }
} }
@@ -495,25 +490,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
} }
} }
fn set_vectors( fn set_vectors(&self, docid: DocumentId, embeddings: Vec<Embedding>) {
&self,
external_docid: &'a str,
docid: DocumentId,
embeddings: Vec<Embedding>,
) -> Result<()> {
for (embedding_index, embedding) in embeddings.iter().enumerate() {
if embedding.len() != self.dimensions {
return Err(UserError::InvalidIndexingVectorDimensions {
expected: self.dimensions,
found: embedding.len(),
embedder_name: self.embedder_name.to_string(),
document_id: external_docid.to_string(),
embedding_index,
}
.into());
}
}
self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap(); self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap();
Ok(())
} }
} }

View File

@@ -3,7 +3,7 @@ use std::sync::atomic::Ordering;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use bumpalo::Bump; use bumpalo::Bump;
use heed::{RoTxn, WithoutTls}; use heed::RoTxn;
use rayon::iter::IndexedParallelIterator; use rayon::iter::IndexedParallelIterator;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
@@ -28,7 +28,7 @@ pub struct DocumentChangeContext<
/// inside of the DB. /// inside of the DB.
pub db_fields_ids_map: &'indexer FieldsIdsMap, pub db_fields_ids_map: &'indexer FieldsIdsMap,
/// A transaction providing data from the DB before all indexing operations /// A transaction providing data from the DB before all indexing operations
pub rtxn: RoTxn<'indexer, WithoutTls>, pub rtxn: RoTxn<'indexer>,
/// Global field id map that is up to date with the current state of the indexing process. /// Global field id map that is up to date with the current state of the indexing process.
/// ///

View File

@@ -13,7 +13,6 @@ use super::super::thread_local::{FullySend, ThreadLocal};
use super::super::FacetFieldIdsDelta; use super::super::FacetFieldIdsDelta;
use super::document_changes::{extract, DocumentChanges, IndexingContext}; use super::document_changes::{extract, DocumentChanges, IndexingContext};
use crate::index::IndexEmbeddingConfig; use crate::index::IndexEmbeddingConfig;
use crate::progress::MergingWordCache;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::update::new::extract::EmbeddingExtractor; use crate::update::new::extract::EmbeddingExtractor;
use crate::update::new::merger::merge_and_send_rtree; use crate::update::new::merger::merge_and_send_rtree;
@@ -97,7 +96,6 @@ where
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted"); let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
let _entered = span.enter(); let _entered = span.enter();
indexing_context.progress.update_progress(IndexingStep::MergingFacetCaches);
facet_field_ids_delta = merge_and_send_facet_docids( facet_field_ids_delta = merge_and_send_facet_docids(
caches, caches,
@@ -119,6 +117,7 @@ where
} = { } = {
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids"); let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
let _entered = span.enter(); let _entered = span.enter();
WordDocidsExtractors::run_extraction( WordDocidsExtractors::run_extraction(
document_changes, document_changes,
indexing_context, indexing_context,
@@ -127,13 +126,9 @@ where
)? )?
}; };
indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
let _entered = span.enter(); let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordDocids);
merge_and_send_docids( merge_and_send_docids(
word_docids, word_docids,
index.word_docids.remap_types(), index.word_docids.remap_types(),
@@ -147,8 +142,6 @@ where
let span = let span =
tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
let _entered = span.enter(); let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
merge_and_send_docids( merge_and_send_docids(
word_fid_docids, word_fid_docids,
index.word_fid_docids.remap_types(), index.word_fid_docids.remap_types(),
@@ -162,8 +155,6 @@ where
let span = let span =
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
let _entered = span.enter(); let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
merge_and_send_docids( merge_and_send_docids(
exact_word_docids, exact_word_docids,
index.exact_word_docids.remap_types(), index.exact_word_docids.remap_types(),
@@ -177,8 +168,6 @@ where
let span = let span =
tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
let _entered = span.enter(); let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
merge_and_send_docids( merge_and_send_docids(
word_position_docids, word_position_docids,
index.word_position_docids.remap_types(), index.word_position_docids.remap_types(),
@@ -192,8 +181,6 @@ where
let span = let span =
tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
let _entered = span.enter(); let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
merge_and_send_docids( merge_and_send_docids(
fid_word_count_docids, fid_word_count_docids,
index.field_id_word_count_docids.remap_types(), index.field_id_word_count_docids.remap_types(),
@@ -223,7 +210,6 @@ where
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids"); let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
let _entered = span.enter(); let _entered = span.enter();
indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);
merge_and_send_docids( merge_and_send_docids(
caches, caches,

Some files were not shown because too many files have changed in this diff Show More