mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-21 11:56:57 +00:00
Compare commits
1 Commits
v1.11.3
...
control-be
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
920348ffa8 |
66
Cargo.lock
generated
66
Cargo.lock
generated
@@ -386,16 +386,15 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arroy"
|
name = "arroy"
|
||||||
version = "0.5.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e"
|
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"heed",
|
"heed",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"nohash",
|
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
"rand",
|
"rand",
|
||||||
"rayon",
|
"rayon",
|
||||||
@@ -472,7 +471,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -653,7 +652,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@@ -934,9 +933,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "charabia"
|
name = "charabia"
|
||||||
version = "0.9.1"
|
version = "0.9.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798"
|
checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"csv",
|
"csv",
|
||||||
@@ -1623,7 +1622,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -1835,7 +1834,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
@@ -1857,7 +1856,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"nom",
|
"nom",
|
||||||
@@ -1877,7 +1876,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -2001,7 +2000,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -2553,7 +2552,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"arroy",
|
"arroy",
|
||||||
@@ -2747,7 +2746,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -2839,7 +2838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
|
checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"windows-targets 0.52.4",
|
"windows-targets 0.48.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3366,7 +3365,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5",
|
"md5",
|
||||||
@@ -3375,7 +3374,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@@ -3415,7 +3414,6 @@ dependencies = [
|
|||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"mimalloc",
|
"mimalloc",
|
||||||
"mime",
|
"mime",
|
||||||
"mopa-maintained",
|
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"obkv",
|
"obkv",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@@ -3465,7 +3463,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@@ -3484,7 +3482,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -3514,7 +3512,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -3545,7 +3543,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arroy",
|
"arroy",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -3682,24 +3680,12 @@ dependencies = [
|
|||||||
"syn 2.0.60",
|
"syn 2.0.60",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "mopa-maintained"
|
|
||||||
version = "0.2.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "79b7f3e22167862cc7c95b21a6f326c22e4bf40da59cbf000b368a310173ba11"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mutually_exclusive_features"
|
name = "mutually_exclusive_features"
|
||||||
version = "0.0.3"
|
version = "0.0.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nohash"
|
|
||||||
version = "0.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.3"
|
version = "7.1.3"
|
||||||
@@ -3991,7 +3977,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -4589,8 +4575,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rhai"
|
name = "rhai"
|
||||||
version = "1.20.0"
|
version = "1.19.0"
|
||||||
source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "61797318be89b1a268a018a92a7657096d83f3ecb31418b9e9c16dcbb043b702"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.8.11",
|
"ahash 0.8.11",
|
||||||
"bitflags 2.6.0",
|
"bitflags 2.6.0",
|
||||||
@@ -4607,7 +4594,8 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "rhai_codegen"
|
name = "rhai_codegen"
|
||||||
version = "2.2.0"
|
version = "2.2.0"
|
||||||
source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a5a11a05ee1ce44058fa3d5961d05194fdbe3ad6b40f904af764d81b86450e6b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -6380,7 +6368,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.11.3"
|
version = "1.11.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co
|
|||||||
## ✨ Features
|
## ✨ Features
|
||||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
|
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
|
||||||
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
|
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
|
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
|
||||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||||
- **Easy to install, deploy, and maintain**
|
- **Easy to install, deploy, and maintain**
|
||||||
|
|||||||
@@ -255,8 +255,6 @@ pub(crate) mod test {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_json_snapshot!(vector_index.settings().unwrap());
|
|
||||||
|
|
||||||
{
|
{
|
||||||
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
||||||
let mut documents = documents.unwrap();
|
let mut documents = documents.unwrap();
|
||||||
|
|||||||
@@ -1,56 +1,783 @@
|
|||||||
---
|
---
|
||||||
source: dump/src/reader/mod.rs
|
source: dump/src/reader/mod.rs
|
||||||
expression: vector_index.settings().unwrap()
|
expression: document
|
||||||
---
|
---
|
||||||
{
|
{
|
||||||
"displayedAttributes": [
|
"id": "e3",
|
||||||
"*"
|
"desc": "overriden vector + map",
|
||||||
|
"_vectors": {
|
||||||
|
"default": [
|
||||||
|
0.2,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1
|
||||||
],
|
],
|
||||||
"searchableAttributes": [
|
"toto": [
|
||||||
"*"
|
0.1
|
||||||
],
|
]
|
||||||
"filterableAttributes": [],
|
|
||||||
"sortableAttributes": [],
|
|
||||||
"rankingRules": [
|
|
||||||
"words",
|
|
||||||
"typo",
|
|
||||||
"proximity",
|
|
||||||
"attribute",
|
|
||||||
"sort",
|
|
||||||
"exactness"
|
|
||||||
],
|
|
||||||
"stopWords": [],
|
|
||||||
"nonSeparatorTokens": [],
|
|
||||||
"separatorTokens": [],
|
|
||||||
"dictionary": [],
|
|
||||||
"synonyms": {},
|
|
||||||
"distinctAttribute": null,
|
|
||||||
"proximityPrecision": "byWord",
|
|
||||||
"typoTolerance": {
|
|
||||||
"enabled": true,
|
|
||||||
"minWordSizeForTypos": {
|
|
||||||
"oneTypo": 5,
|
|
||||||
"twoTypos": 9
|
|
||||||
},
|
|
||||||
"disableOnWords": [],
|
|
||||||
"disableOnAttributes": []
|
|
||||||
},
|
|
||||||
"faceting": {
|
|
||||||
"maxValuesPerFacet": 100,
|
|
||||||
"sortFacetValuesBy": {
|
|
||||||
"*": "alpha"
|
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"pagination": {
|
|
||||||
"maxTotalHits": 1000
|
|
||||||
},
|
|
||||||
"embedders": {
|
|
||||||
"default": {
|
|
||||||
"source": "huggingFace",
|
|
||||||
"model": "BAAI/bge-base-en-v1.5",
|
|
||||||
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
|
|
||||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"searchCutoffMs": null
|
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,780 +0,0 @@
|
|||||||
---
|
|
||||||
source: dump/src/reader/mod.rs
|
|
||||||
expression: document
|
|
||||||
---
|
|
||||||
{
|
|
||||||
"id": "e0",
|
|
||||||
"desc": "overriden vector",
|
|
||||||
"_vectors": {
|
|
||||||
"default": [
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -40,7 +40,7 @@ ureq = "2.10.0"
|
|||||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
arroy = "0.5.0"
|
arroy = "0.4.0"
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
crossbeam = "0.8.4"
|
crossbeam = "0.8.4"
|
||||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||||
|
|||||||
@@ -1263,7 +1263,7 @@ impl IndexScheduler {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
|
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
|
||||||
|
|
||||||
tracing::error!("Batch failed {}", error);
|
tracing::info!("Batch failed {}", error);
|
||||||
|
|
||||||
self.update_task(&mut wtxn, &task)
|
self.update_task(&mut wtxn, &task)
|
||||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
|
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
|
||||||
@@ -1477,7 +1477,7 @@ impl IndexScheduler {
|
|||||||
.map(
|
.map(
|
||||||
|IndexEmbeddingConfig {
|
|IndexEmbeddingConfig {
|
||||||
name,
|
name,
|
||||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
|
||||||
..
|
..
|
||||||
}| {
|
}| {
|
||||||
let prompt =
|
let prompt =
|
||||||
@@ -1486,10 +1486,7 @@ impl IndexScheduler {
|
|||||||
{
|
{
|
||||||
let embedders = self.embedders.read().unwrap();
|
let embedders = self.embedders.read().unwrap();
|
||||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||||
return Ok((
|
return Ok((name, (embedder.clone(), prompt)));
|
||||||
name,
|
|
||||||
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1503,7 +1500,7 @@ impl IndexScheduler {
|
|||||||
let mut embedders = self.embedders.write().unwrap();
|
let mut embedders = self.embedders.write().unwrap();
|
||||||
embedders.insert(embedder_options, embedder.clone());
|
embedders.insert(embedder_options, embedder.clone());
|
||||||
}
|
}
|
||||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
Ok((name, (embedder, prompt)))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.collect();
|
.collect();
|
||||||
@@ -5200,11 +5197,10 @@ mod tests {
|
|||||||
let simple_hf_name = name.clone();
|
let simple_hf_name = name.clone();
|
||||||
|
|
||||||
let configs = index_scheduler.embedders(configs).unwrap();
|
let configs = index_scheduler.embedders(configs).unwrap();
|
||||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
|
||||||
let beagle_embed =
|
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
||||||
hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();
|
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
||||||
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap();
|
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
||||||
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap();
|
|
||||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -5523,7 +5519,6 @@ mod tests {
|
|||||||
400,
|
400,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
quantized: None,
|
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[1, 2]>,
|
user_provided: RoaringBitmap<[1, 2]>,
|
||||||
},
|
},
|
||||||
@@ -5536,8 +5531,28 @@ mod tests {
|
|||||||
|
|
||||||
// the document with the id 3 should keep its original embedding
|
// the document with the id 3 should keep its original embedding
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let mut embeddings = Vec::new();
|
||||||
let embeddings = &embeddings["my_doggo_embedder"];
|
|
||||||
|
'vectors: for i in 0..=u8::MAX {
|
||||||
|
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
||||||
|
.map(Some)
|
||||||
|
.or_else(|e| match e {
|
||||||
|
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||||
|
e => Err(e),
|
||||||
|
})
|
||||||
|
.transpose();
|
||||||
|
|
||||||
|
let Some(reader) = reader else {
|
||||||
|
break 'vectors;
|
||||||
|
};
|
||||||
|
|
||||||
|
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
|
||||||
|
if let Some(embedding) = embedding {
|
||||||
|
embeddings.push(embedding)
|
||||||
|
} else {
|
||||||
|
break 'vectors;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
snapshot!(embeddings.len(), @"1");
|
snapshot!(embeddings.len(), @"1");
|
||||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||||
@@ -5722,7 +5737,6 @@ mod tests {
|
|||||||
400,
|
400,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
quantized: None,
|
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[0]>,
|
user_provided: RoaringBitmap<[0]>,
|
||||||
},
|
},
|
||||||
@@ -5766,7 +5780,6 @@ mod tests {
|
|||||||
400,
|
400,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
quantized: None,
|
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[]>,
|
user_provided: RoaringBitmap<[]>,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
|||||||
@@ -66,8 +66,3 @@ khmer = ["milli/khmer"]
|
|||||||
vietnamese = ["milli/vietnamese"]
|
vietnamese = ["milli/vietnamese"]
|
||||||
# force swedish character recomposition
|
# force swedish character recomposition
|
||||||
swedish-recomposition = ["milli/swedish-recomposition"]
|
swedish-recomposition = ["milli/swedish-recomposition"]
|
||||||
# allow german tokenization
|
|
||||||
german = ["milli/german"]
|
|
||||||
# allow turkish normalization
|
|
||||||
turkish = ["milli/turkish"]
|
|
||||||
|
|
||||||
|
|||||||
@@ -395,10 +395,7 @@ impl ErrorCode for milli::Error {
|
|||||||
| UserError::InvalidSettingsDimensions { .. }
|
| UserError::InvalidSettingsDimensions { .. }
|
||||||
| UserError::InvalidUrl { .. }
|
| UserError::InvalidUrl { .. }
|
||||||
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
||||||
| UserError::InvalidPrompt(_)
|
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
||||||
| UserError::InvalidDisableBinaryQuantization { .. } => {
|
|
||||||
Code::InvalidSettingsEmbedders
|
|
||||||
}
|
|
||||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||||
|
|||||||
@@ -39,14 +39,12 @@ macro_rules! make_locale {
|
|||||||
pub enum Locale {
|
pub enum Locale {
|
||||||
$($iso_639_1,)+
|
$($iso_639_1,)+
|
||||||
$($iso_639_3,)+
|
$($iso_639_3,)+
|
||||||
Cmn,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<milli::tokenizer::Language> for Locale {
|
impl From<milli::tokenizer::Language> for Locale {
|
||||||
fn from(other: milli::tokenizer::Language) -> Locale {
|
fn from(other: milli::tokenizer::Language) -> Locale {
|
||||||
match other {
|
match other {
|
||||||
$(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+
|
$(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+
|
||||||
milli::tokenizer::Language::Cmn => Locale::Cmn,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -56,7 +54,6 @@ macro_rules! make_locale {
|
|||||||
match other {
|
match other {
|
||||||
$(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+
|
$(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+
|
||||||
$(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+
|
$(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+
|
||||||
Locale::Cmn => milli::tokenizer::Language::Cmn,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -68,7 +65,6 @@ macro_rules! make_locale {
|
|||||||
let locale = match s {
|
let locale = match s {
|
||||||
$($iso_639_1_str => Locale::$iso_639_1,)+
|
$($iso_639_1_str => Locale::$iso_639_1,)+
|
||||||
$($iso_639_3_str => Locale::$iso_639_3,)+
|
$($iso_639_3_str => Locale::$iso_639_3,)+
|
||||||
"cmn" => Locale::Cmn,
|
|
||||||
_ => return Err(LocaleFormatError { invalid_locale: s.to_string() }),
|
_ => return Err(LocaleFormatError { invalid_locale: s.to_string() }),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -83,9 +79,8 @@ macro_rules! make_locale {
|
|||||||
|
|
||||||
impl std::fmt::Display for LocaleFormatError {
|
impl std::fmt::Display for LocaleFormatError {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"];
|
let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", ");
|
||||||
valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right)));
|
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales)
|
||||||
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", "))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,6 +99,7 @@ make_locale!(
|
|||||||
(Bg, "bg") => (Bul, "bul"),
|
(Bg, "bg") => (Bul, "bul"),
|
||||||
(Ca, "ca") => (Cat, "cat"),
|
(Ca, "ca") => (Cat, "cat"),
|
||||||
(Cs, "cs") => (Ces, "ces"),
|
(Cs, "cs") => (Ces, "ces"),
|
||||||
|
(Zh, "zh") => (Cmn, "cmn"),
|
||||||
(Da, "da") => (Dan, "dan"),
|
(Da, "da") => (Dan, "dan"),
|
||||||
(De, "de") => (Deu, "deu"),
|
(De, "de") => (Deu, "deu"),
|
||||||
(El, "el") => (Ell, "ell"),
|
(El, "el") => (Ell, "ell"),
|
||||||
@@ -161,6 +157,5 @@ make_locale!(
|
|||||||
(Uz, "uz") => (Uzb, "uzb"),
|
(Uz, "uz") => (Uzb, "uzb"),
|
||||||
(Vi, "vi") => (Vie, "vie"),
|
(Vi, "vi") => (Vie, "vie"),
|
||||||
(Yi, "yi") => (Yid, "yid"),
|
(Yi, "yi") => (Yid, "yid"),
|
||||||
(Zh, "zh") => (Zho, "zho"),
|
|
||||||
(Zu, "zu") => (Zul, "zul"),
|
(Zu, "zu") => (Zul, "zul"),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ reqwest = { version = "0.12.5", features = [
|
|||||||
rustls = { version = "0.23.11", features = ["ring"], default-features = false }
|
rustls = { version = "0.23.11", features = ["ring"], default-features = false }
|
||||||
rustls-pki-types = { version = "1.7.0", features = ["alloc"] }
|
rustls-pki-types = { version = "1.7.0", features = ["alloc"] }
|
||||||
rustls-pemfile = "2.1.2"
|
rustls-pemfile = "2.1.2"
|
||||||
segment = { version = "0.2.4" }
|
segment = { version = "0.2.4", optional = true }
|
||||||
serde = { version = "1.0.204", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
sha2 = "0.10.8"
|
sha2 = "0.10.8"
|
||||||
@@ -104,7 +104,6 @@ tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
|||||||
tracing-actix-web = "0.7.11"
|
tracing-actix-web = "0.7.11"
|
||||||
build-info = { version = "1.7.0", path = "../build-info" }
|
build-info = { version = "1.7.0", path = "../build-info" }
|
||||||
roaring = "0.10.2"
|
roaring = "0.10.2"
|
||||||
mopa-maintained = "0.2.3"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
actix-rt = "2.10.0"
|
actix-rt = "2.10.0"
|
||||||
@@ -132,7 +131,8 @@ tempfile = { version = "3.10.1", optional = true }
|
|||||||
zip = { version = "2.1.3", optional = true }
|
zip = { version = "2.1.3", optional = true }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||||
|
analytics = ["segment"]
|
||||||
mini-dashboard = [
|
mini-dashboard = [
|
||||||
"static-files",
|
"static-files",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -153,9 +153,7 @@ greek = ["meilisearch-types/greek"]
|
|||||||
khmer = ["meilisearch-types/khmer"]
|
khmer = ["meilisearch-types/khmer"]
|
||||||
vietnamese = ["meilisearch-types/vietnamese"]
|
vietnamese = ["meilisearch-types/vietnamese"]
|
||||||
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
||||||
german = ["meilisearch-types/german"]
|
|
||||||
turkish = ["meilisearch-types/turkish"]
|
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.15/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
|
||||||
sha1 = "d057600b4a839a2e0c0be7a372cd1b2683f3ca7e"
|
sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
|
||||||
|
|||||||
109
meilisearch/src/analytics/mock_analytics.rs
Normal file
109
meilisearch/src/analytics/mock_analytics.rs
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
use std::any::Any;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use actix_web::HttpRequest;
|
||||||
|
use meilisearch_types::InstanceUid;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||||
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
|
use crate::Opt;
|
||||||
|
|
||||||
|
pub struct MockAnalytics {
|
||||||
|
instance_uid: Option<InstanceUid>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct SearchAggregator;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl SearchAggregator {
|
||||||
|
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct SimilarAggregator;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl SimilarAggregator {
|
||||||
|
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct MultiSearchAggregator;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl MultiSearchAggregator {
|
||||||
|
pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct FacetSearchAggregator;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl FacetSearchAggregator {
|
||||||
|
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MockAnalytics {
|
||||||
|
#[allow(clippy::new_ret_no_self)]
|
||||||
|
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||||
|
let instance_uid = find_user_id(&opt.db_path);
|
||||||
|
Arc::new(Self { instance_uid })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Analytics for MockAnalytics {
|
||||||
|
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
|
||||||
|
self.instance_uid.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
// These methods are noop and should be optimized out
|
||||||
|
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||||
|
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||||
|
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||||
|
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||||
|
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||||
|
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
||||||
|
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
|
||||||
|
fn add_documents(
|
||||||
|
&self,
|
||||||
|
_documents_query: &UpdateDocumentsQuery,
|
||||||
|
_index_creation: bool,
|
||||||
|
_request: &HttpRequest,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
|
||||||
|
fn update_documents(
|
||||||
|
&self,
|
||||||
|
_documents_query: &UpdateDocumentsQuery,
|
||||||
|
_index_creation: bool,
|
||||||
|
_request: &HttpRequest,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
_documents_query: &DocumentEditionByFunction,
|
||||||
|
_index_creation: bool,
|
||||||
|
_request: &HttpRequest,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
|
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
|
}
|
||||||
@@ -1,45 +1,44 @@
|
|||||||
pub mod segment_analytics;
|
mod mock_analytics;
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
|
mod segment_analytics;
|
||||||
|
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use actix_web::HttpRequest;
|
use actix_web::HttpRequest;
|
||||||
use index_scheduler::IndexScheduler;
|
|
||||||
use meilisearch_auth::AuthController;
|
|
||||||
use meilisearch_types::InstanceUid;
|
use meilisearch_types::InstanceUid;
|
||||||
use mopa::mopafy;
|
pub use mock_analytics::MockAnalytics;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use platform_dirs::AppDirs;
|
use platform_dirs::AppDirs;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
|
|
||||||
|
// if the analytics feature is disabled
|
||||||
|
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
||||||
|
#[cfg(not(feature = "analytics"))]
|
||||||
|
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
||||||
|
#[cfg(not(feature = "analytics"))]
|
||||||
|
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||||
|
#[cfg(not(feature = "analytics"))]
|
||||||
|
pub type SimilarAggregator = mock_analytics::SimilarAggregator;
|
||||||
|
#[cfg(not(feature = "analytics"))]
|
||||||
|
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
||||||
|
#[cfg(not(feature = "analytics"))]
|
||||||
|
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
|
||||||
|
|
||||||
// if the feature analytics is enabled we use the real analytics
|
// if the feature analytics is enabled we use the real analytics
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
use crate::Opt;
|
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
|
pub type SimilarAggregator = segment_analytics::SimilarAggregator;
|
||||||
#[macro_export]
|
#[cfg(feature = "analytics")]
|
||||||
macro_rules! empty_analytics {
|
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
||||||
($struct_name:ident, $event_name:literal) => {
|
#[cfg(feature = "analytics")]
|
||||||
#[derive(Default)]
|
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
|
||||||
struct $struct_name {}
|
|
||||||
|
|
||||||
impl $crate::analytics::Aggregate for $struct_name {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
$event_name
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, _other: Box<Self>) -> Box<Self> {
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::json!({})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The Meilisearch config dir:
|
/// The Meilisearch config dir:
|
||||||
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
||||||
@@ -79,88 +78,60 @@ pub enum DocumentFetchKind {
|
|||||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
||||||
}
|
}
|
||||||
|
|
||||||
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
|
pub trait Analytics: Sync + Send {
|
||||||
pub trait Aggregate: 'static + mopa::Any + Send {
|
fn instance_uid(&self) -> Option<&InstanceUid>;
|
||||||
/// The name of the event that will be sent to segment.
|
|
||||||
fn event_name(&self) -> &'static str;
|
|
||||||
|
|
||||||
/// Will be called every time an event has been used twice before segment flushed its buffer.
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self>
|
|
||||||
where
|
|
||||||
Self: Sized;
|
|
||||||
|
|
||||||
/// Converts your structure to the final event that'll be sent to segment.
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value;
|
|
||||||
}
|
|
||||||
|
|
||||||
mopafy!(Aggregate);
|
|
||||||
|
|
||||||
/// Helper trait to define multiple aggregates with the same content but a different name.
|
|
||||||
/// Commonly used when you must aggregate a search with POST or with GET, for example.
|
|
||||||
pub trait AggregateMethod: 'static + Default + Send {
|
|
||||||
fn event_name() -> &'static str;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A macro used to quickly define multiple aggregate method with their name
|
|
||||||
/// Usage:
|
|
||||||
/// ```rust
|
|
||||||
/// use meilisearch::aggregate_methods;
|
|
||||||
///
|
|
||||||
/// aggregate_methods!(
|
|
||||||
/// SearchGET => "Documents Searched GET",
|
|
||||||
/// SearchPOST => "Documents Searched POST",
|
|
||||||
/// );
|
|
||||||
/// ```
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! aggregate_methods {
|
|
||||||
($method:ident => $event_name:literal) => {
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct $method {}
|
|
||||||
|
|
||||||
impl $crate::analytics::AggregateMethod for $method {
|
|
||||||
fn event_name() -> &'static str {
|
|
||||||
$event_name
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
($($method:ident => $event_name:literal,)+) => {
|
|
||||||
$(
|
|
||||||
aggregate_methods!($method => $event_name);
|
|
||||||
)+
|
|
||||||
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Analytics {
|
|
||||||
segment: Option<Arc<SegmentAnalytics>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Analytics {
|
|
||||||
pub async fn new(
|
|
||||||
opt: &Opt,
|
|
||||||
index_scheduler: Arc<IndexScheduler>,
|
|
||||||
auth_controller: Arc<AuthController>,
|
|
||||||
) -> Self {
|
|
||||||
if opt.no_analytics {
|
|
||||||
Self { segment: None }
|
|
||||||
} else {
|
|
||||||
Self { segment: SegmentAnalytics::new(opt, index_scheduler, auth_controller).await }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn no_analytics() -> Self {
|
|
||||||
Self { segment: None }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn instance_uid(&self) -> Option<&InstanceUid> {
|
|
||||||
self.segment.as_ref().map(|segment| segment.instance_uid.as_ref())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The method used to publish most analytics that do not need to be batched every hours
|
/// The method used to publish most analytics that do not need to be batched every hours
|
||||||
pub fn publish<T: Aggregate>(&self, event: T, request: &HttpRequest) {
|
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
|
||||||
if let Some(ref segment) = self.segment {
|
|
||||||
let _ = segment.sender.try_send(segment_analytics::Message::new(event, request));
|
/// This method should be called to aggregate a get search
|
||||||
}
|
fn get_search(&self, aggregate: SearchAggregator);
|
||||||
}
|
|
||||||
|
/// This method should be called to aggregate a post search
|
||||||
|
fn post_search(&self, aggregate: SearchAggregator);
|
||||||
|
|
||||||
|
/// This method should be called to aggregate a get similar request
|
||||||
|
fn get_similar(&self, aggregate: SimilarAggregator);
|
||||||
|
|
||||||
|
/// This method should be called to aggregate a post similar request
|
||||||
|
fn post_similar(&self, aggregate: SimilarAggregator);
|
||||||
|
|
||||||
|
/// This method should be called to aggregate a post array of searches
|
||||||
|
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
||||||
|
|
||||||
|
/// This method should be called to aggregate post facet values searches
|
||||||
|
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
|
||||||
|
|
||||||
|
// this method should be called to aggregate an add documents request
|
||||||
|
fn add_documents(
|
||||||
|
&self,
|
||||||
|
documents_query: &UpdateDocumentsQuery,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
);
|
||||||
|
|
||||||
|
// this method should be called to aggregate a fetch documents request
|
||||||
|
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
|
||||||
|
|
||||||
|
// this method should be called to aggregate a fetch documents request
|
||||||
|
fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
|
||||||
|
|
||||||
|
// this method should be called to aggregate a add documents request
|
||||||
|
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
|
||||||
|
|
||||||
|
// this method should be called to batch an update documents request
|
||||||
|
fn update_documents(
|
||||||
|
&self,
|
||||||
|
documents_query: &UpdateDocumentsQuery,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
);
|
||||||
|
|
||||||
|
// this method should be called to batch an update documents by function request
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
documents_query: &DocumentEditionByFunction,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -72,7 +72,7 @@ pub enum MeilisearchHttpError {
|
|||||||
DocumentFormat(#[from] DocumentFormatError),
|
DocumentFormat(#[from] DocumentFormatError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Join(#[from] JoinError),
|
Join(#[from] JoinError),
|
||||||
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
|
||||||
MissingSearchHybrid,
|
MissingSearchHybrid,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ pub fn create_app(
|
|||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
opt: Opt,
|
opt: Opt,
|
||||||
logs: (LogRouteHandle, LogStderrHandle),
|
logs: (LogRouteHandle, LogStderrHandle),
|
||||||
analytics: Data<Analytics>,
|
analytics: Arc<dyn Analytics>,
|
||||||
enable_dashboard: bool,
|
enable_dashboard: bool,
|
||||||
) -> actix_web::App<
|
) -> actix_web::App<
|
||||||
impl ServiceFactory<
|
impl ServiceFactory<
|
||||||
@@ -473,14 +473,14 @@ pub fn configure_data(
|
|||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
opt: &Opt,
|
opt: &Opt,
|
||||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
||||||
analytics: Data<Analytics>,
|
analytics: Arc<dyn Analytics>,
|
||||||
) {
|
) {
|
||||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||||
config
|
config
|
||||||
.app_data(index_scheduler)
|
.app_data(index_scheduler)
|
||||||
.app_data(auth)
|
.app_data(auth)
|
||||||
.app_data(search_queue)
|
.app_data(search_queue)
|
||||||
.app_data(analytics)
|
.app_data(web::Data::from(analytics))
|
||||||
.app_data(web::Data::new(logs_route))
|
.app_data(web::Data::new(logs_route))
|
||||||
.app_data(web::Data::new(logs_stderr))
|
.app_data(web::Data::new(logs_stderr))
|
||||||
.app_data(web::Data::new(opt.clone()))
|
.app_data(web::Data::new(opt.clone()))
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ use std::path::PathBuf;
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread::available_parallelism;
|
use std::thread::available_parallelism;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use actix_web::http::KeepAlive;
|
use actix_web::http::KeepAlive;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
@@ -124,12 +123,19 @@ async fn try_main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||||
|
|
||||||
let analytics =
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
|
let analytics = if !opt.no_analytics {
|
||||||
|
analytics::SegmentAnalytics::new(&opt, index_scheduler.clone(), auth_controller.clone())
|
||||||
|
.await
|
||||||
|
} else {
|
||||||
|
analytics::MockAnalytics::new(&opt)
|
||||||
|
};
|
||||||
|
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||||
|
let analytics = analytics::MockAnalytics::new(&opt);
|
||||||
|
|
||||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||||
|
|
||||||
run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;
|
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -139,23 +145,16 @@ async fn run_http(
|
|||||||
auth_controller: Arc<AuthController>,
|
auth_controller: Arc<AuthController>,
|
||||||
opt: Opt,
|
opt: Opt,
|
||||||
logs: (LogRouteHandle, LogStderrHandle),
|
logs: (LogRouteHandle, LogStderrHandle),
|
||||||
analytics: Arc<Analytics>,
|
analytics: Arc<dyn Analytics>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let enable_dashboard = &opt.env == "development";
|
let enable_dashboard = &opt.env == "development";
|
||||||
let opt_clone = opt.clone();
|
let opt_clone = opt.clone();
|
||||||
let index_scheduler = Data::from(index_scheduler);
|
let index_scheduler = Data::from(index_scheduler);
|
||||||
let auth_controller = Data::from(auth_controller);
|
let auth_controller = Data::from(auth_controller);
|
||||||
let analytics = Data::from(analytics);
|
|
||||||
let search_queue = SearchQueue::new(
|
let search_queue = SearchQueue::new(
|
||||||
opt.experimental_search_queue_size,
|
opt.experimental_search_queue_size,
|
||||||
available_parallelism()
|
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
||||||
.unwrap_or(NonZeroUsize::new(2).unwrap())
|
);
|
||||||
.checked_mul(opt.experimental_nb_searches_per_core)
|
|
||||||
.unwrap_or(NonZeroUsize::MAX),
|
|
||||||
)
|
|
||||||
.with_time_to_abort(Duration::from_secs(
|
|
||||||
usize::from(opt.experimental_drop_search_after) as u64
|
|
||||||
));
|
|
||||||
let search_queue = Data::new(search_queue);
|
let search_queue = Data::new(search_queue);
|
||||||
|
|
||||||
let http_server = HttpServer::new(move || {
|
let http_server = HttpServer::new(move || {
|
||||||
@@ -181,7 +180,11 @@ async fn run_http(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Option<PathBuf>) {
|
pub fn print_launch_resume(
|
||||||
|
opt: &Opt,
|
||||||
|
analytics: Arc<dyn Analytics>,
|
||||||
|
config_read_from: Option<PathBuf>,
|
||||||
|
) {
|
||||||
let build_info = build_info::BuildInfo::from_build();
|
let build_info = build_info::BuildInfo::from_build();
|
||||||
|
|
||||||
let protocol =
|
let protocol =
|
||||||
@@ -223,6 +226,7 @@ pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Op
|
|||||||
eprintln!("Prototype:\t\t{:?}", prototype);
|
eprintln!("Prototype:\t\t{:?}", prototype);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
{
|
{
|
||||||
if !opt.no_analytics {
|
if !opt.no_analytics {
|
||||||
eprintln!(
|
eprintln!(
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::env::VarError;
|
|||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::io::{BufReader, Read};
|
use std::io::{BufReader, Read};
|
||||||
use std::num::{NonZeroUsize, ParseIntError};
|
use std::num::ParseIntError;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
@@ -29,6 +29,7 @@ const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
|||||||
const MEILI_ENV: &str = "MEILI_ENV";
|
const MEILI_ENV: &str = "MEILI_ENV";
|
||||||
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
|
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
|
||||||
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
|
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||||
const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
|
const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
|
||||||
@@ -54,8 +55,6 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO
|
|||||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
|
||||||
const MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE: &str = "MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE";
|
|
||||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||||
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||||
@@ -209,6 +208,7 @@ pub struct Opt {
|
|||||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||||
/// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
|
/// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
|
||||||
/// at any time.
|
/// at any time.
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
#[serde(default)] // we can't send true
|
#[serde(default)] // we can't send true
|
||||||
#[clap(long, env = MEILI_NO_ANALYTICS)]
|
#[clap(long, env = MEILI_NO_ANALYTICS)]
|
||||||
pub no_analytics: bool,
|
pub no_analytics: bool,
|
||||||
@@ -357,26 +357,10 @@ pub struct Opt {
|
|||||||
/// Lets you customize the size of the search queue. Meilisearch processes your search requests as fast as possible but once the
|
/// Lets you customize the size of the search queue. Meilisearch processes your search requests as fast as possible but once the
|
||||||
/// queue is full it starts returning HTTP 503, Service Unavailable.
|
/// queue is full it starts returning HTTP 503, Service Unavailable.
|
||||||
/// The default value is 1000.
|
/// The default value is 1000.
|
||||||
#[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = default_experimental_search_queue_size())]
|
#[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = 1000)]
|
||||||
#[serde(default = "default_experimental_search_queue_size")]
|
#[serde(default)]
|
||||||
pub experimental_search_queue_size: usize,
|
pub experimental_search_queue_size: usize,
|
||||||
|
|
||||||
/// Experimental drop search after. For more information, see: <https://github.com/orgs/meilisearch/discussions/783>
|
|
||||||
///
|
|
||||||
/// Let you customize after how many seconds Meilisearch should consider a search request irrelevant and drop it.
|
|
||||||
/// The default value is 60.
|
|
||||||
#[clap(long, env = MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, default_value_t = default_drop_search_after())]
|
|
||||||
#[serde(default = "default_drop_search_after")]
|
|
||||||
pub experimental_drop_search_after: NonZeroUsize,
|
|
||||||
|
|
||||||
/// Experimental number of searches per core. For more information, see: <https://github.com/orgs/meilisearch/discussions/784>
|
|
||||||
///
|
|
||||||
/// Lets you customize how many search requests can run on each core concurrently.
|
|
||||||
/// The default value is 4.
|
|
||||||
#[clap(long, env = MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, default_value_t = default_nb_searches_per_core())]
|
|
||||||
#[serde(default = "default_nb_searches_per_core")]
|
|
||||||
pub experimental_nb_searches_per_core: NonZeroUsize,
|
|
||||||
|
|
||||||
/// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>
|
/// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>
|
||||||
///
|
///
|
||||||
/// Change the mode of the logs on the console.
|
/// Change the mode of the logs on the console.
|
||||||
@@ -423,6 +407,7 @@ pub struct Opt {
|
|||||||
|
|
||||||
impl Opt {
|
impl Opt {
|
||||||
/// Whether analytics should be enabled or not.
|
/// Whether analytics should be enabled or not.
|
||||||
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
pub fn analytics(&self) -> bool {
|
pub fn analytics(&self) -> bool {
|
||||||
!self.no_analytics
|
!self.no_analytics
|
||||||
}
|
}
|
||||||
@@ -502,12 +487,11 @@ impl Opt {
|
|||||||
ignore_missing_dump: _,
|
ignore_missing_dump: _,
|
||||||
ignore_dump_if_db_exists: _,
|
ignore_dump_if_db_exists: _,
|
||||||
config_file_path: _,
|
config_file_path: _,
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
no_analytics,
|
no_analytics,
|
||||||
experimental_contains_filter,
|
experimental_contains_filter,
|
||||||
experimental_enable_metrics,
|
experimental_enable_metrics,
|
||||||
experimental_search_queue_size,
|
experimental_search_queue_size,
|
||||||
experimental_drop_search_after,
|
|
||||||
experimental_nb_searches_per_core,
|
|
||||||
experimental_logs_mode,
|
experimental_logs_mode,
|
||||||
experimental_enable_logs_route,
|
experimental_enable_logs_route,
|
||||||
experimental_replication_parameters,
|
experimental_replication_parameters,
|
||||||
@@ -529,7 +513,10 @@ impl Opt {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
|
{
|
||||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||||
|
}
|
||||||
export_to_env_if_not_present(
|
export_to_env_if_not_present(
|
||||||
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
|
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
|
||||||
http_payload_size_limit.to_string(),
|
http_payload_size_limit.to_string(),
|
||||||
@@ -572,14 +559,6 @@ impl Opt {
|
|||||||
MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE,
|
MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE,
|
||||||
experimental_search_queue_size.to_string(),
|
experimental_search_queue_size.to_string(),
|
||||||
);
|
);
|
||||||
export_to_env_if_not_present(
|
|
||||||
MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER,
|
|
||||||
experimental_drop_search_after.to_string(),
|
|
||||||
);
|
|
||||||
export_to_env_if_not_present(
|
|
||||||
MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE,
|
|
||||||
experimental_nb_searches_per_core.to_string(),
|
|
||||||
);
|
|
||||||
export_to_env_if_not_present(
|
export_to_env_if_not_present(
|
||||||
MEILI_EXPERIMENTAL_LOGS_MODE,
|
MEILI_EXPERIMENTAL_LOGS_MODE,
|
||||||
experimental_logs_mode.to_string(),
|
experimental_logs_mode.to_string(),
|
||||||
@@ -911,18 +890,6 @@ fn default_dump_dir() -> PathBuf {
|
|||||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_experimental_search_queue_size() -> usize {
|
|
||||||
1000
|
|
||||||
}
|
|
||||||
|
|
||||||
fn default_drop_search_after() -> NonZeroUsize {
|
|
||||||
NonZeroUsize::new(60).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn default_nb_searches_per_core() -> NonZeroUsize {
|
|
||||||
NonZeroUsize::new(4).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Indicates if a snapshot was scheduled, and if yes with which interval.
|
/// Indicates if a snapshot was scheduled, and if yes with which interval.
|
||||||
#[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
|
#[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
|
||||||
pub enum ScheduleSnapshot {
|
pub enum ScheduleSnapshot {
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ use index_scheduler::IndexScheduler;
|
|||||||
use meilisearch_auth::AuthController;
|
use meilisearch_auth::AuthController;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
|
use serde_json::json;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::Analytics;
|
||||||
@@ -17,16 +18,14 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
||||||
}
|
}
|
||||||
|
|
||||||
crate::empty_analytics!(DumpAnalytics, "Dump Created");
|
|
||||||
|
|
||||||
pub async fn create_dump(
|
pub async fn create_dump(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
analytics.publish(DumpAnalytics::default(), &req);
|
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
|
||||||
|
|
||||||
let task = KindWithContent::DumpCreation {
|
let task = KindWithContent::DumpCreation {
|
||||||
keys: auth_controller.list_keys()?,
|
keys: auth_controller.list_keys()?,
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ use index_scheduler::IndexScheduler;
|
|||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::keys::actions;
|
use meilisearch_types::keys::actions;
|
||||||
use serde::Serialize;
|
use serde_json::json;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::{Aggregate, Analytics};
|
use crate::analytics::Analytics;
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
@@ -17,7 +17,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
|||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(
|
cfg.service(
|
||||||
web::resource("")
|
web::resource("")
|
||||||
.route(web::get().to(get_features))
|
.route(web::get().to(SeqHandler(get_features)))
|
||||||
.route(web::patch().to(SeqHandler(patch_features))),
|
.route(web::patch().to(SeqHandler(patch_features))),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -27,9 +27,12 @@ async fn get_features(
|
|||||||
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
|
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
|
||||||
Data<IndexScheduler>,
|
Data<IndexScheduler>,
|
||||||
>,
|
>,
|
||||||
|
req: HttpRequest,
|
||||||
|
analytics: Data<dyn Analytics>,
|
||||||
) -> HttpResponse {
|
) -> HttpResponse {
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
|
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||||
let features = features.runtime_features();
|
let features = features.runtime_features();
|
||||||
debug!(returns = ?features, "Get features");
|
debug!(returns = ?features, "Get features");
|
||||||
HttpResponse::Ok().json(features)
|
HttpResponse::Ok().json(features)
|
||||||
@@ -50,35 +53,6 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub contains_filter: Option<bool>,
|
pub contains_filter: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
pub struct PatchExperimentalFeatureAnalytics {
|
|
||||||
vector_store: bool,
|
|
||||||
metrics: bool,
|
|
||||||
logs_route: bool,
|
|
||||||
edit_documents_by_function: bool,
|
|
||||||
contains_filter: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Experimental features Updated"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
vector_store: new.vector_store,
|
|
||||||
metrics: new.metrics,
|
|
||||||
logs_route: new.logs_route,
|
|
||||||
edit_documents_by_function: new.edit_documents_by_function,
|
|
||||||
contains_filter: new.contains_filter,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn patch_features(
|
async fn patch_features(
|
||||||
index_scheduler: GuardedData<
|
index_scheduler: GuardedData<
|
||||||
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
|
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
|
||||||
@@ -86,7 +60,7 @@ async fn patch_features(
|
|||||||
>,
|
>,
|
||||||
new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
|
new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: Data<Analytics>,
|
analytics: Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
debug!(parameters = ?new_features, "Patch features");
|
debug!(parameters = ?new_features, "Patch features");
|
||||||
@@ -115,14 +89,15 @@ async fn patch_features(
|
|||||||
} = new_features;
|
} = new_features;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
PatchExperimentalFeatureAnalytics {
|
"Experimental features Updated".to_string(),
|
||||||
vector_store,
|
json!({
|
||||||
metrics,
|
"vector_store": vector_store,
|
||||||
logs_route,
|
"metrics": metrics,
|
||||||
edit_documents_by_function,
|
"logs_route": logs_route,
|
||||||
contains_filter,
|
"edit_documents_by_function": edit_documents_by_function,
|
||||||
},
|
"contains_filter": contains_filter,
|
||||||
&req,
|
}),
|
||||||
|
Some(&req),
|
||||||
);
|
);
|
||||||
index_scheduler.put_runtime_features(new_features)?;
|
index_scheduler.put_runtime_features(new_features)?;
|
||||||
debug!(returns = ?new_features, "Patch features");
|
debug!(returns = ?new_features, "Patch features");
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
use std::io::ErrorKind;
|
use std::io::ErrorKind;
|
||||||
use std::marker::PhantomData;
|
|
||||||
|
|
||||||
use actix_web::http::header::CONTENT_TYPE;
|
use actix_web::http::header::CONTENT_TYPE;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
@@ -25,14 +23,14 @@ use meilisearch_types::tasks::KindWithContent;
|
|||||||
use meilisearch_types::{milli, Document, Index};
|
use meilisearch_types::{milli, Document, Index};
|
||||||
use mime::Mime;
|
use mime::Mime;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::Deserialize;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tempfile::tempfile;
|
use tempfile::tempfile;
|
||||||
use tokio::fs::File;
|
use tokio::fs::File;
|
||||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
|
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::{Aggregate, AggregateMethod, Analytics};
|
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::error::PayloadError::ReceivePayload;
|
use crate::error::PayloadError::ReceivePayload;
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
@@ -43,7 +41,7 @@ use crate::routes::{
|
|||||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||||
};
|
};
|
||||||
use crate::search::{parse_filter, RetrieveVectors};
|
use crate::search::{parse_filter, RetrieveVectors};
|
||||||
use crate::{aggregate_methods, Opt};
|
use crate::Opt;
|
||||||
|
|
||||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||||
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
|
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
|
||||||
@@ -102,84 +100,12 @@ pub struct GetDocument {
|
|||||||
retrieve_vectors: Param<bool>,
|
retrieve_vectors: Param<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
aggregate_methods!(
|
|
||||||
DocumentsGET => "Documents Fetched GET",
|
|
||||||
DocumentsPOST => "Documents Fetched POST",
|
|
||||||
);
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
|
||||||
// a call on ../documents/:doc_id
|
|
||||||
per_document_id: bool,
|
|
||||||
// if a filter was used
|
|
||||||
per_filter: bool,
|
|
||||||
|
|
||||||
#[serde(rename = "vector.retrieve_vectors")]
|
|
||||||
retrieve_vectors: bool,
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
#[serde(rename = "pagination.max_limit")]
|
|
||||||
max_limit: usize,
|
|
||||||
#[serde(rename = "pagination.max_offset")]
|
|
||||||
max_offset: usize,
|
|
||||||
|
|
||||||
marker: std::marker::PhantomData<Method>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
|
||||||
pub enum DocumentFetchKind {
|
|
||||||
PerDocumentId { retrieve_vectors: bool },
|
|
||||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
|
|
||||||
pub fn from_query(query: &DocumentFetchKind) -> Self {
|
|
||||||
let (limit, offset, retrieve_vectors) = match query {
|
|
||||||
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
|
|
||||||
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
|
|
||||||
(*limit, *offset, *retrieve_vectors)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
Self {
|
|
||||||
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
|
|
||||||
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
|
||||||
max_limit: limit,
|
|
||||||
max_offset: offset,
|
|
||||||
retrieve_vectors,
|
|
||||||
|
|
||||||
marker: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
Method::event_name()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
per_document_id: self.per_document_id | new.per_document_id,
|
|
||||||
per_filter: self.per_filter | new.per_filter,
|
|
||||||
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
|
|
||||||
max_limit: self.max_limit.max(new.max_limit),
|
|
||||||
max_offset: self.max_offset.max(new.max_offset),
|
|
||||||
marker: PhantomData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_document(
|
pub async fn get_document(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||||
document_param: web::Path<DocumentParam>,
|
document_param: web::Path<DocumentParam>,
|
||||||
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
|
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let DocumentParam { index_uid, document_id } = document_param.into_inner();
|
let DocumentParam { index_uid, document_id } = document_param.into_inner();
|
||||||
debug!(parameters = ?params, "Get document");
|
debug!(parameters = ?params, "Get document");
|
||||||
@@ -191,15 +117,8 @@ pub async fn get_document(
|
|||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
|
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.get_fetch_documents(
|
||||||
DocumentsFetchAggregator::<DocumentsGET> {
|
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
|
||||||
retrieve_vectors: param_retrieve_vectors.0,
|
|
||||||
per_document_id: true,
|
|
||||||
per_filter: false,
|
|
||||||
max_limit: 0,
|
|
||||||
max_offset: 0,
|
|
||||||
marker: PhantomData,
|
|
||||||
},
|
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -210,52 +129,17 @@ pub async fn get_document(
|
|||||||
Ok(HttpResponse::Ok().json(document))
|
Ok(HttpResponse::Ok().json(document))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
pub struct DocumentsDeletionAggregator {
|
|
||||||
per_document_id: bool,
|
|
||||||
clear_all: bool,
|
|
||||||
per_batch: bool,
|
|
||||||
per_filter: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for DocumentsDeletionAggregator {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Documents Deleted"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
per_document_id: self.per_document_id | new.per_document_id,
|
|
||||||
clear_all: self.clear_all | new.clear_all,
|
|
||||||
per_batch: self.per_batch | new.per_batch,
|
|
||||||
per_filter: self.per_filter | new.per_filter,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn delete_document(
|
pub async fn delete_document(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
path: web::Path<DocumentParam>,
|
path: web::Path<DocumentParam>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||||
let index_uid = IndexUid::try_from(index_uid)?;
|
let index_uid = IndexUid::try_from(index_uid)?;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
|
||||||
DocumentsDeletionAggregator {
|
|
||||||
per_document_id: true,
|
|
||||||
clear_all: false,
|
|
||||||
per_batch: false,
|
|
||||||
per_filter: false,
|
|
||||||
},
|
|
||||||
&req,
|
|
||||||
);
|
|
||||||
|
|
||||||
let task = KindWithContent::DocumentDeletion {
|
let task = KindWithContent::DocumentDeletion {
|
||||||
index_uid: index_uid.to_string(),
|
index_uid: index_uid.to_string(),
|
||||||
@@ -306,19 +190,17 @@ pub async fn documents_by_query_post(
|
|||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
body: AwebJson<BrowseQuery, DeserrJsonError>,
|
body: AwebJson<BrowseQuery, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let body = body.into_inner();
|
let body = body.into_inner();
|
||||||
debug!(parameters = ?body, "Get documents POST");
|
debug!(parameters = ?body, "Get documents POST");
|
||||||
|
|
||||||
analytics.publish(
|
analytics.post_fetch_documents(
|
||||||
DocumentsFetchAggregator::<DocumentsPOST> {
|
&DocumentFetchKind::Normal {
|
||||||
per_filter: body.filter.is_some(),
|
with_filter: body.filter.is_some(),
|
||||||
|
limit: body.limit,
|
||||||
|
offset: body.offset,
|
||||||
retrieve_vectors: body.retrieve_vectors,
|
retrieve_vectors: body.retrieve_vectors,
|
||||||
max_limit: body.limit,
|
|
||||||
max_offset: body.offset,
|
|
||||||
per_document_id: false,
|
|
||||||
marker: PhantomData,
|
|
||||||
},
|
},
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
@@ -331,7 +213,7 @@ pub async fn get_documents(
|
|||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
|
params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?params, "Get documents GET");
|
debug!(parameters = ?params, "Get documents GET");
|
||||||
|
|
||||||
@@ -353,14 +235,12 @@ pub async fn get_documents(
|
|||||||
filter,
|
filter,
|
||||||
};
|
};
|
||||||
|
|
||||||
analytics.publish(
|
analytics.get_fetch_documents(
|
||||||
DocumentsFetchAggregator::<DocumentsGET> {
|
&DocumentFetchKind::Normal {
|
||||||
per_filter: query.filter.is_some(),
|
with_filter: query.filter.is_some(),
|
||||||
|
limit: query.limit,
|
||||||
|
offset: query.offset,
|
||||||
retrieve_vectors: query.retrieve_vectors,
|
retrieve_vectors: query.retrieve_vectors,
|
||||||
max_limit: query.limit,
|
|
||||||
max_offset: query.offset,
|
|
||||||
per_document_id: false,
|
|
||||||
marker: PhantomData,
|
|
||||||
},
|
},
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
@@ -418,39 +298,6 @@ fn from_char_csv_delimiter(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
aggregate_methods!(
|
|
||||||
Replaced => "Documents Added",
|
|
||||||
Updated => "Documents Updated",
|
|
||||||
);
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
pub struct DocumentsAggregator<T: AggregateMethod> {
|
|
||||||
payload_types: HashSet<String>,
|
|
||||||
primary_key: HashSet<String>,
|
|
||||||
index_creation: bool,
|
|
||||||
#[serde(skip)]
|
|
||||||
method: PhantomData<T>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> Aggregate for DocumentsAggregator<Method> {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
Method::event_name()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
payload_types: self.payload_types.union(&new.payload_types).cloned().collect(),
|
|
||||||
primary_key: self.primary_key.union(&new.primary_key).cloned().collect(),
|
|
||||||
index_creation: self.index_creation | new.index_creation,
|
|
||||||
method: PhantomData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn replace_documents(
|
pub async fn replace_documents(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
@@ -458,32 +305,16 @@ pub async fn replace_documents(
|
|||||||
body: Payload,
|
body: Payload,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
debug!(parameters = ?params, "Replace documents");
|
debug!(parameters = ?params, "Replace documents");
|
||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
|
|
||||||
let mut content_types = HashSet::new();
|
analytics.add_documents(
|
||||||
let content_type = req
|
¶ms,
|
||||||
.headers()
|
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
||||||
.get(CONTENT_TYPE)
|
|
||||||
.and_then(|s| s.to_str().ok())
|
|
||||||
.unwrap_or("unknown")
|
|
||||||
.to_string();
|
|
||||||
content_types.insert(content_type);
|
|
||||||
let mut primary_keys = HashSet::new();
|
|
||||||
if let Some(primary_key) = params.primary_key.clone() {
|
|
||||||
primary_keys.insert(primary_key);
|
|
||||||
}
|
|
||||||
analytics.publish(
|
|
||||||
DocumentsAggregator::<Replaced> {
|
|
||||||
payload_types: content_types,
|
|
||||||
primary_key: primary_keys,
|
|
||||||
index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
|
||||||
method: PhantomData,
|
|
||||||
},
|
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -515,32 +346,16 @@ pub async fn update_documents(
|
|||||||
body: Payload,
|
body: Payload,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
debug!(parameters = ?params, "Update documents");
|
debug!(parameters = ?params, "Update documents");
|
||||||
|
|
||||||
let mut content_types = HashSet::new();
|
analytics.add_documents(
|
||||||
let content_type = req
|
¶ms,
|
||||||
.headers()
|
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
||||||
.get(CONTENT_TYPE)
|
|
||||||
.and_then(|s| s.to_str().ok())
|
|
||||||
.unwrap_or("unknown")
|
|
||||||
.to_string();
|
|
||||||
content_types.insert(content_type);
|
|
||||||
let mut primary_keys = HashSet::new();
|
|
||||||
if let Some(primary_key) = params.primary_key.clone() {
|
|
||||||
primary_keys.insert(primary_key);
|
|
||||||
}
|
|
||||||
analytics.publish(
|
|
||||||
DocumentsAggregator::<Updated> {
|
|
||||||
payload_types: content_types,
|
|
||||||
primary_key: primary_keys,
|
|
||||||
index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
|
||||||
method: PhantomData,
|
|
||||||
},
|
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -709,20 +524,12 @@ pub async fn delete_documents_batch(
|
|||||||
body: web::Json<Vec<Value>>,
|
body: web::Json<Vec<Value>>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?body, "Delete documents by batch");
|
debug!(parameters = ?body, "Delete documents by batch");
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
|
||||||
DocumentsDeletionAggregator {
|
|
||||||
per_batch: true,
|
|
||||||
per_document_id: false,
|
|
||||||
clear_all: false,
|
|
||||||
per_filter: false,
|
|
||||||
},
|
|
||||||
&req,
|
|
||||||
);
|
|
||||||
|
|
||||||
let ids = body
|
let ids = body
|
||||||
.iter()
|
.iter()
|
||||||
@@ -755,22 +562,14 @@ pub async fn delete_documents_by_filter(
|
|||||||
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?body, "Delete documents by filter");
|
debug!(parameters = ?body, "Delete documents by filter");
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let index_uid = index_uid.into_inner();
|
let index_uid = index_uid.into_inner();
|
||||||
let filter = body.into_inner().filter;
|
let filter = body.into_inner().filter;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
|
||||||
DocumentsDeletionAggregator {
|
|
||||||
per_filter: true,
|
|
||||||
per_document_id: false,
|
|
||||||
clear_all: false,
|
|
||||||
per_batch: false,
|
|
||||||
},
|
|
||||||
&req,
|
|
||||||
);
|
|
||||||
|
|
||||||
// we ensure the filter is well formed before enqueuing it
|
// we ensure the filter is well formed before enqueuing it
|
||||||
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
|
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
|
||||||
@@ -800,41 +599,13 @@ pub struct DocumentEditionByFunction {
|
|||||||
pub function: String,
|
pub function: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct EditDocumentsByFunctionAggregator {
|
|
||||||
// Set to true if at least one request was filtered
|
|
||||||
filtered: bool,
|
|
||||||
// Set to true if at least one request contained a context
|
|
||||||
with_context: bool,
|
|
||||||
|
|
||||||
index_creation: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for EditDocumentsByFunctionAggregator {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Documents Edited By Function"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
filtered: self.filtered | new.filtered,
|
|
||||||
with_context: self.with_context | new.with_context,
|
|
||||||
index_creation: self.index_creation | new.index_creation,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn edit_documents_by_function(
|
pub async fn edit_documents_by_function(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?params, "Edit documents by function");
|
debug!(parameters = ?params, "Edit documents by function");
|
||||||
|
|
||||||
@@ -846,12 +617,9 @@ pub async fn edit_documents_by_function(
|
|||||||
let index_uid = index_uid.into_inner();
|
let index_uid = index_uid.into_inner();
|
||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
|
|
||||||
analytics.publish(
|
analytics.update_documents_by_function(
|
||||||
EditDocumentsByFunctionAggregator {
|
¶ms,
|
||||||
filtered: params.filter.is_some(),
|
index_scheduler.index(&index_uid).is_err(),
|
||||||
with_context: params.context.is_some(),
|
|
||||||
index_creation: index_scheduler.index(&index_uid).is_err(),
|
|
||||||
},
|
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -902,18 +670,10 @@ pub async fn clear_all_documents(
|
|||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
analytics.publish(
|
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
|
||||||
DocumentsDeletionAggregator {
|
|
||||||
clear_all: true,
|
|
||||||
per_document_id: false,
|
|
||||||
per_batch: false,
|
|
||||||
per_filter: false,
|
|
||||||
},
|
|
||||||
&req,
|
|
||||||
);
|
|
||||||
|
|
||||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||||
let uid = get_task_id(&req, &opt)?;
|
let uid = get_task_id(&req, &opt)?;
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
use std::collections::{BinaryHeap, HashSet};
|
|
||||||
|
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use deserr::actix_web::AwebJson;
|
use deserr::actix_web::AwebJson;
|
||||||
@@ -12,15 +10,14 @@ use meilisearch_types::locales::Locale;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::{Aggregate, Analytics};
|
use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::routes::indexes::search::search_kind;
|
use crate::routes::indexes::search::search_kind;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy,
|
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||||
RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
DEFAULT_SEARCH_OFFSET,
|
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@@ -56,122 +53,20 @@ pub struct FacetSearchQuery {
|
|||||||
pub locales: Option<Vec<Locale>>,
|
pub locales: Option<Vec<Locale>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct FacetSearchAggregator {
|
|
||||||
// requests
|
|
||||||
total_received: usize,
|
|
||||||
total_succeeded: usize,
|
|
||||||
time_spent: BinaryHeap<usize>,
|
|
||||||
|
|
||||||
// The set of all facetNames that were used
|
|
||||||
facet_names: HashSet<String>,
|
|
||||||
|
|
||||||
// As there been any other parameter than the facetName or facetQuery ones?
|
|
||||||
additional_search_parameters_provided: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FacetSearchAggregator {
|
|
||||||
#[allow(clippy::field_reassign_with_default)]
|
|
||||||
pub fn from_query(query: &FacetSearchQuery) -> Self {
|
|
||||||
let FacetSearchQuery {
|
|
||||||
facet_query: _,
|
|
||||||
facet_name,
|
|
||||||
vector,
|
|
||||||
q,
|
|
||||||
filter,
|
|
||||||
matching_strategy,
|
|
||||||
attributes_to_search_on,
|
|
||||||
hybrid,
|
|
||||||
ranking_score_threshold,
|
|
||||||
locales,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
Self {
|
|
||||||
total_received: 1,
|
|
||||||
facet_names: Some(facet_name.clone()).into_iter().collect(),
|
|
||||||
additional_search_parameters_provided: q.is_some()
|
|
||||||
|| vector.is_some()
|
|
||||||
|| filter.is_some()
|
|
||||||
|| *matching_strategy != MatchingStrategy::default()
|
|
||||||
|| attributes_to_search_on.is_some()
|
|
||||||
|| hybrid.is_some()
|
|
||||||
|| ranking_score_threshold.is_some()
|
|
||||||
|| locales.is_some(),
|
|
||||||
..Default::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn succeed(&mut self, result: &FacetSearchResult) {
|
|
||||||
let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result;
|
|
||||||
self.total_succeeded = 1;
|
|
||||||
self.time_spent.push(*processing_time_ms as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for FacetSearchAggregator {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Facet Searched POST"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
for time in new.time_spent {
|
|
||||||
self.time_spent.push(time);
|
|
||||||
}
|
|
||||||
|
|
||||||
Box::new(Self {
|
|
||||||
total_received: self.total_received.saturating_add(new.total_received),
|
|
||||||
total_succeeded: self.total_succeeded.saturating_add(new.total_succeeded),
|
|
||||||
time_spent: self.time_spent,
|
|
||||||
facet_names: self.facet_names.union(&new.facet_names).cloned().collect(),
|
|
||||||
additional_search_parameters_provided: self.additional_search_parameters_provided
|
|
||||||
| new.additional_search_parameters_provided,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
time_spent,
|
|
||||||
facet_names,
|
|
||||||
additional_search_parameters_provided,
|
|
||||||
} = *self;
|
|
||||||
// the index of the 99th percentage of value
|
|
||||||
let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.;
|
|
||||||
// we get all the values in a sorted manner
|
|
||||||
let time_spent = time_spent.into_sorted_vec();
|
|
||||||
// We are only interested by the slowest value of the 99th fastest results
|
|
||||||
let time_spent = time_spent.get(percentile_99th as usize);
|
|
||||||
|
|
||||||
serde_json::json!({
|
|
||||||
"requests": {
|
|
||||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
|
||||||
"total_succeeded": total_succeeded,
|
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
||||||
"total_received": total_received,
|
|
||||||
},
|
|
||||||
"facets": {
|
|
||||||
"total_distinct_facet_count": facet_names.len(),
|
|
||||||
"additional_search_parameters_provided": additional_search_parameters_provided,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn search(
|
pub async fn search(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
|
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
let query = params.into_inner();
|
let query = params.into_inner();
|
||||||
debug!(parameters = ?query, "Facet search");
|
debug!(parameters = ?query, "Facet search");
|
||||||
|
|
||||||
let mut aggregate = FacetSearchAggregator::from_query(&query);
|
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
let facet_query = query.facet_query.clone();
|
let facet_query = query.facet_query.clone();
|
||||||
let facet_name = query.facet_name.clone();
|
let facet_name = query.facet_name.clone();
|
||||||
@@ -205,7 +100,7 @@ pub async fn search(
|
|||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
}
|
}
|
||||||
analytics.publish(aggregate, &req);
|
analytics.post_facet_search(aggregate);
|
||||||
|
|
||||||
let search_result = search_result?;
|
let search_result = search_result?;
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
use std::collections::BTreeSet;
|
|
||||||
use std::convert::Infallible;
|
use std::convert::Infallible;
|
||||||
|
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
@@ -14,11 +13,12 @@ use meilisearch_types::index_uid::IndexUid;
|
|||||||
use meilisearch_types::milli::{self, FieldDistribution, Index};
|
use meilisearch_types::milli::{self, FieldDistribution, Index};
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use serde_json::json;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||||
use crate::analytics::{Aggregate, Analytics};
|
use crate::analytics::Analytics;
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
@@ -28,11 +28,8 @@ use crate::Opt;
|
|||||||
pub mod documents;
|
pub mod documents;
|
||||||
pub mod facet_search;
|
pub mod facet_search;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
mod search_analytics;
|
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
mod settings_analytics;
|
|
||||||
pub mod similar;
|
pub mod similar;
|
||||||
mod similar_analytics;
|
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(
|
cfg.service(
|
||||||
@@ -126,31 +123,12 @@ pub struct IndexCreateRequest {
|
|||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct IndexCreatedAggregate {
|
|
||||||
primary_key: BTreeSet<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for IndexCreatedAggregate {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Index Created"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn create_index(
|
pub async fn create_index(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
|
||||||
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
|
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?body, "Create index");
|
debug!(parameters = ?body, "Create index");
|
||||||
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
||||||
@@ -158,8 +136,9 @@ pub async fn create_index(
|
|||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
|
||||||
if allow_index_creation {
|
if allow_index_creation {
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() },
|
"Index Created".to_string(),
|
||||||
&req,
|
json!({ "primary_key": primary_key }),
|
||||||
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
|
||||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||||
@@ -215,38 +194,21 @@ pub async fn get_index(
|
|||||||
Ok(HttpResponse::Ok().json(index_view))
|
Ok(HttpResponse::Ok().json(index_view))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct IndexUpdatedAggregate {
|
|
||||||
primary_key: BTreeSet<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for IndexUpdatedAggregate {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Index Updated"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub async fn update_index(
|
pub async fn update_index(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
|
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?body, "Update index");
|
debug!(parameters = ?body, "Update index");
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let body = body.into_inner();
|
let body = body.into_inner();
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() },
|
"Index Updated".to_string(),
|
||||||
&req,
|
json!({ "primary_key": body.primary_key }),
|
||||||
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
|
||||||
let task = KindWithContent::IndexUpdate {
|
let task = KindWithContent::IndexUpdate {
|
||||||
|
|||||||
@@ -13,13 +13,12 @@ use meilisearch_types::serde_cs::vec::CS;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::{Analytics, SearchAggregator};
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||||
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||||
@@ -129,10 +128,8 @@ impl std::ops::Deref for SemanticRatioGet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<SearchQueryGet> for SearchQuery {
|
impl From<SearchQueryGet> for SearchQuery {
|
||||||
type Error = ResponseError;
|
fn from(other: SearchQueryGet) -> Self {
|
||||||
|
|
||||||
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
|
|
||||||
let filter = match other.filter {
|
let filter = match other.filter {
|
||||||
Some(f) => match serde_json::from_str(&f) {
|
Some(f) => match serde_json::from_str(&f) {
|
||||||
Ok(v) => Some(v),
|
Ok(v) => Some(v),
|
||||||
@@ -143,28 +140,19 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
|||||||
|
|
||||||
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
||||||
(None, None) => None,
|
(None, None) => None,
|
||||||
(None, Some(_)) => {
|
(None, Some(semantic_ratio)) => {
|
||||||
return Err(ResponseError::from_msg(
|
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
|
||||||
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
|
|
||||||
meilisearch_types::error::Code::InvalidHybridQuery,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
(Some(embedder), None) => {
|
|
||||||
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
|
|
||||||
}
|
}
|
||||||
|
(Some(embedder), None) => Some(HybridQuery {
|
||||||
|
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
|
||||||
|
embedder: Some(embedder),
|
||||||
|
}),
|
||||||
(Some(embedder), Some(semantic_ratio)) => {
|
(Some(embedder), Some(semantic_ratio)) => {
|
||||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
|
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if other.vector.is_some() && hybrid.is_none() {
|
Self {
|
||||||
return Err(ResponseError::from_msg(
|
|
||||||
"`hybridEmbedder` is mandatory when `vector` is present".into(),
|
|
||||||
meilisearch_types::error::Code::MissingSearchHybrid,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
q: other.q,
|
q: other.q,
|
||||||
vector: other.vector.map(CS::into_inner),
|
vector: other.vector.map(CS::into_inner),
|
||||||
offset: other.offset.0,
|
offset: other.offset.0,
|
||||||
@@ -191,7 +179,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
|||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -226,19 +214,19 @@ pub async fn search_with_url_query(
|
|||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?params, "Search get");
|
debug!(parameters = ?params, "Search get");
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
let mut query: SearchQuery = params.into_inner().try_into()?;
|
let mut query: SearchQuery = params.into_inner().into();
|
||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
add_search_rules(&mut query.filter, search_rules);
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query);
|
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
@@ -255,7 +243,7 @@ pub async fn search_with_url_query(
|
|||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
}
|
}
|
||||||
analytics.publish(aggregate, &req);
|
analytics.get_search(aggregate);
|
||||||
|
|
||||||
let search_result = search_result?;
|
let search_result = search_result?;
|
||||||
|
|
||||||
@@ -269,7 +257,7 @@ pub async fn search_with_post(
|
|||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebJson<SearchQuery, DeserrJsonError>,
|
params: AwebJson<SearchQuery, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
@@ -281,7 +269,7 @@ pub async fn search_with_post(
|
|||||||
add_search_rules(&mut query.filter, search_rules);
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);
|
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
@@ -303,7 +291,7 @@ pub async fn search_with_post(
|
|||||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
analytics.publish(aggregate, &req);
|
analytics.post_search(aggregate);
|
||||||
|
|
||||||
let search_result = search_result?;
|
let search_result = search_result?;
|
||||||
|
|
||||||
@@ -324,36 +312,44 @@ pub fn search_kind(
|
|||||||
features.check_vector("Passing `hybrid` as a parameter")?;
|
features.check_vector("Passing `hybrid` as a parameter")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle with care, the order of cases matters, the semantics is subtle
|
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
||||||
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
if query.vector.is_none() {
|
||||||
// empty query, no vector => placeholder search
|
match &query.q {
|
||||||
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly),
|
||||||
// no query, no vector => placeholder search
|
None => return Ok(SearchKind::KeywordOnly),
|
||||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
_ => {}
|
||||||
// hybrid.semantic_ratio == 1.0 => vector
|
|
||||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
|
||||||
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
|
|
||||||
}
|
}
|
||||||
// hybrid.semantic_ratio == 0.0 => keyword
|
|
||||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
|
||||||
Ok(SearchKind::KeywordOnly)
|
|
||||||
}
|
}
|
||||||
// no query, hybrid, vector => semantic
|
|
||||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
match &query.hybrid {
|
||||||
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
|
Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => {
|
||||||
}
|
Ok(SearchKind::semantic(
|
||||||
// query, no hybrid, no vector => keyword
|
|
||||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
|
||||||
// query, hybrid, maybe vector => hybrid
|
|
||||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
|
||||||
index_scheduler,
|
index_scheduler,
|
||||||
index,
|
index,
|
||||||
embedder,
|
embedder.as_deref(),
|
||||||
|
query.vector.as_ref().map(Vec::len),
|
||||||
|
)?)
|
||||||
|
}
|
||||||
|
Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => {
|
||||||
|
Ok(SearchKind::KeywordOnly)
|
||||||
|
}
|
||||||
|
Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid(
|
||||||
|
index_scheduler,
|
||||||
|
index,
|
||||||
|
embedder.as_deref(),
|
||||||
**semantic_ratio,
|
**semantic_ratio,
|
||||||
v.map(|v| v.len()),
|
query.vector.as_ref().map(Vec::len),
|
||||||
),
|
)?),
|
||||||
|
None => match (query.q.as_deref(), query.vector.as_deref()) {
|
||||||
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
(_query, None) => Ok(SearchKind::KeywordOnly),
|
||||||
|
(None, Some(_vector)) => Ok(SearchKind::semantic(
|
||||||
|
index_scheduler,
|
||||||
|
index,
|
||||||
|
None,
|
||||||
|
query.vector.as_ref().map(Vec::len),
|
||||||
|
)?),
|
||||||
|
(Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,485 +0,0 @@
|
|||||||
use once_cell::sync::Lazy;
|
|
||||||
use regex::Regex;
|
|
||||||
use serde_json::{json, Value};
|
|
||||||
use std::collections::{BTreeSet, BinaryHeap, HashMap};
|
|
||||||
|
|
||||||
use meilisearch_types::locales::Locale;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
aggregate_methods,
|
|
||||||
analytics::{Aggregate, AggregateMethod},
|
|
||||||
search::{
|
|
||||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
|
||||||
DEFAULT_SEMANTIC_RATIO,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
aggregate_methods!(
|
|
||||||
SearchGET => "Documents Searched GET",
|
|
||||||
SearchPOST => "Documents Searched POST",
|
|
||||||
);
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct SearchAggregator<Method: AggregateMethod> {
|
|
||||||
// requests
|
|
||||||
total_received: usize,
|
|
||||||
total_succeeded: usize,
|
|
||||||
total_degraded: usize,
|
|
||||||
total_used_negative_operator: usize,
|
|
||||||
time_spent: BinaryHeap<usize>,
|
|
||||||
|
|
||||||
// sort
|
|
||||||
sort_with_geo_point: bool,
|
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
|
||||||
sort_sum_of_criteria_terms: usize,
|
|
||||||
// every time a request has a filter, this field must be incremented by one
|
|
||||||
sort_total_number_of_criteria: usize,
|
|
||||||
|
|
||||||
// distinct
|
|
||||||
distinct: bool,
|
|
||||||
|
|
||||||
// filter
|
|
||||||
filter_with_geo_radius: bool,
|
|
||||||
filter_with_geo_bounding_box: bool,
|
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
|
||||||
filter_sum_of_criteria_terms: usize,
|
|
||||||
// every time a request has a filter, this field must be incremented by one
|
|
||||||
filter_total_number_of_criteria: usize,
|
|
||||||
used_syntax: HashMap<String, usize>,
|
|
||||||
|
|
||||||
// attributes_to_search_on
|
|
||||||
// every time a search is done using attributes_to_search_on
|
|
||||||
attributes_to_search_on_total_number_of_uses: usize,
|
|
||||||
|
|
||||||
// q
|
|
||||||
// The maximum number of terms in a q request
|
|
||||||
max_terms_number: usize,
|
|
||||||
|
|
||||||
// vector
|
|
||||||
// The maximum number of floats in a vector request
|
|
||||||
max_vector_size: usize,
|
|
||||||
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
|
||||||
semantic_ratio: bool,
|
|
||||||
hybrid: bool,
|
|
||||||
retrieve_vectors: bool,
|
|
||||||
|
|
||||||
// every time a search is done, we increment the counter linked to the used settings
|
|
||||||
matching_strategy: HashMap<String, usize>,
|
|
||||||
|
|
||||||
// List of the unique Locales passed as parameter
|
|
||||||
locales: BTreeSet<Locale>,
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
max_limit: usize,
|
|
||||||
max_offset: usize,
|
|
||||||
finite_pagination: usize,
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
max_attributes_to_retrieve: usize,
|
|
||||||
max_attributes_to_highlight: usize,
|
|
||||||
highlight_pre_tag: bool,
|
|
||||||
highlight_post_tag: bool,
|
|
||||||
max_attributes_to_crop: usize,
|
|
||||||
crop_marker: bool,
|
|
||||||
show_matches_position: bool,
|
|
||||||
crop_length: bool,
|
|
||||||
|
|
||||||
// facets
|
|
||||||
facets_sum_of_terms: usize,
|
|
||||||
facets_total_number_of_facets: usize,
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
show_ranking_score: bool,
|
|
||||||
show_ranking_score_details: bool,
|
|
||||||
ranking_score_threshold: bool,
|
|
||||||
|
|
||||||
marker: std::marker::PhantomData<Method>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|
||||||
#[allow(clippy::field_reassign_with_default)]
|
|
||||||
pub fn from_query(query: &SearchQuery) -> Self {
|
|
||||||
let SearchQuery {
|
|
||||||
q,
|
|
||||||
vector,
|
|
||||||
offset,
|
|
||||||
limit,
|
|
||||||
page,
|
|
||||||
hits_per_page,
|
|
||||||
attributes_to_retrieve: _,
|
|
||||||
retrieve_vectors,
|
|
||||||
attributes_to_crop: _,
|
|
||||||
crop_length,
|
|
||||||
attributes_to_highlight: _,
|
|
||||||
show_matches_position,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
filter,
|
|
||||||
sort,
|
|
||||||
distinct,
|
|
||||||
facets: _,
|
|
||||||
highlight_pre_tag,
|
|
||||||
highlight_post_tag,
|
|
||||||
crop_marker,
|
|
||||||
matching_strategy,
|
|
||||||
attributes_to_search_on,
|
|
||||||
hybrid,
|
|
||||||
ranking_score_threshold,
|
|
||||||
locales,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
let mut ret = Self::default();
|
|
||||||
|
|
||||||
ret.total_received = 1;
|
|
||||||
|
|
||||||
if let Some(ref sort) = sort {
|
|
||||||
ret.sort_total_number_of_criteria = 1;
|
|
||||||
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
|
|
||||||
ret.sort_sum_of_criteria_terms = sort.len();
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.distinct = distinct.is_some();
|
|
||||||
|
|
||||||
if let Some(ref filter) = filter {
|
|
||||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
|
||||||
ret.filter_total_number_of_criteria = 1;
|
|
||||||
|
|
||||||
let syntax = match filter {
|
|
||||||
Value::String(_) => "string".to_string(),
|
|
||||||
Value::Array(values) => {
|
|
||||||
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
|
||||||
"mixed".to_string()
|
|
||||||
} else {
|
|
||||||
"array".to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => "none".to_string(),
|
|
||||||
};
|
|
||||||
// convert the string to a HashMap
|
|
||||||
ret.used_syntax.insert(syntax, 1);
|
|
||||||
|
|
||||||
let stringified_filters = filter.to_string();
|
|
||||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
|
||||||
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
|
||||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
|
||||||
}
|
|
||||||
|
|
||||||
// attributes_to_search_on
|
|
||||||
if attributes_to_search_on.is_some() {
|
|
||||||
ret.attributes_to_search_on_total_number_of_uses = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref q) = q {
|
|
||||||
ret.max_terms_number = q.split_whitespace().count();
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref vector) = vector {
|
|
||||||
ret.max_vector_size = vector.len();
|
|
||||||
}
|
|
||||||
ret.retrieve_vectors |= retrieve_vectors;
|
|
||||||
|
|
||||||
if query.is_finite_pagination() {
|
|
||||||
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
|
||||||
ret.max_limit = limit;
|
|
||||||
ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
|
|
||||||
ret.finite_pagination = 1;
|
|
||||||
} else {
|
|
||||||
ret.max_limit = *limit;
|
|
||||||
ret.max_offset = *offset;
|
|
||||||
ret.finite_pagination = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
|
|
||||||
|
|
||||||
if let Some(locales) = locales {
|
|
||||||
ret.locales = locales.iter().copied().collect();
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
|
||||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
|
||||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
|
||||||
ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
|
|
||||||
ret.show_matches_position = *show_matches_position;
|
|
||||||
|
|
||||||
ret.show_ranking_score = *show_ranking_score;
|
|
||||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
|
||||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
|
||||||
|
|
||||||
if let Some(hybrid) = hybrid {
|
|
||||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
|
||||||
ret.hybrid = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn succeed(&mut self, result: &SearchResult) {
|
|
||||||
let SearchResult {
|
|
||||||
hits: _,
|
|
||||||
query: _,
|
|
||||||
processing_time_ms,
|
|
||||||
hits_info: _,
|
|
||||||
semantic_hit_count: _,
|
|
||||||
facet_distribution: _,
|
|
||||||
facet_stats: _,
|
|
||||||
degraded,
|
|
||||||
used_negative_operator,
|
|
||||||
} = result;
|
|
||||||
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
||||||
if *degraded {
|
|
||||||
self.total_degraded = self.total_degraded.saturating_add(1);
|
|
||||||
}
|
|
||||||
if *used_negative_operator {
|
|
||||||
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
|
|
||||||
}
|
|
||||||
self.time_spent.push(*processing_time_ms as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
Method::event_name()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
mut time_spent,
|
|
||||||
sort_with_geo_point,
|
|
||||||
sort_sum_of_criteria_terms,
|
|
||||||
sort_total_number_of_criteria,
|
|
||||||
distinct,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
attributes_to_search_on_total_number_of_uses,
|
|
||||||
max_terms_number,
|
|
||||||
max_vector_size,
|
|
||||||
retrieve_vectors,
|
|
||||||
matching_strategy,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
finite_pagination,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
max_attributes_to_highlight,
|
|
||||||
highlight_pre_tag,
|
|
||||||
highlight_post_tag,
|
|
||||||
max_attributes_to_crop,
|
|
||||||
crop_marker,
|
|
||||||
show_matches_position,
|
|
||||||
crop_length,
|
|
||||||
facets_sum_of_terms,
|
|
||||||
facets_total_number_of_facets,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
semantic_ratio,
|
|
||||||
hybrid,
|
|
||||||
total_degraded,
|
|
||||||
total_used_negative_operator,
|
|
||||||
ranking_score_threshold,
|
|
||||||
mut locales,
|
|
||||||
marker: _,
|
|
||||||
} = *new;
|
|
||||||
|
|
||||||
// request
|
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
|
||||||
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
|
|
||||||
self.total_used_negative_operator =
|
|
||||||
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
|
|
||||||
self.time_spent.append(&mut time_spent);
|
|
||||||
|
|
||||||
// sort
|
|
||||||
self.sort_with_geo_point |= sort_with_geo_point;
|
|
||||||
self.sort_sum_of_criteria_terms =
|
|
||||||
self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
|
|
||||||
self.sort_total_number_of_criteria =
|
|
||||||
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
|
|
||||||
|
|
||||||
// distinct
|
|
||||||
self.distinct |= distinct;
|
|
||||||
|
|
||||||
// filter
|
|
||||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
|
||||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
|
||||||
self.filter_sum_of_criteria_terms =
|
|
||||||
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
|
||||||
self.filter_total_number_of_criteria =
|
|
||||||
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
|
||||||
for (key, value) in used_syntax.into_iter() {
|
|
||||||
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
|
||||||
*used_syntax = used_syntax.saturating_add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// attributes_to_search_on
|
|
||||||
self.attributes_to_search_on_total_number_of_uses = self
|
|
||||||
.attributes_to_search_on_total_number_of_uses
|
|
||||||
.saturating_add(attributes_to_search_on_total_number_of_uses);
|
|
||||||
|
|
||||||
// q
|
|
||||||
self.max_terms_number = self.max_terms_number.max(max_terms_number);
|
|
||||||
|
|
||||||
// vector
|
|
||||||
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
|
||||||
self.retrieve_vectors |= retrieve_vectors;
|
|
||||||
self.semantic_ratio |= semantic_ratio;
|
|
||||||
self.hybrid |= hybrid;
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
|
||||||
self.max_offset = self.max_offset.max(max_offset);
|
|
||||||
self.finite_pagination += finite_pagination;
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
self.max_attributes_to_retrieve =
|
|
||||||
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
|
||||||
self.max_attributes_to_highlight =
|
|
||||||
self.max_attributes_to_highlight.max(max_attributes_to_highlight);
|
|
||||||
self.highlight_pre_tag |= highlight_pre_tag;
|
|
||||||
self.highlight_post_tag |= highlight_post_tag;
|
|
||||||
self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
|
|
||||||
self.crop_marker |= crop_marker;
|
|
||||||
self.show_matches_position |= show_matches_position;
|
|
||||||
self.crop_length |= crop_length;
|
|
||||||
|
|
||||||
// facets
|
|
||||||
self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
|
|
||||||
self.facets_total_number_of_facets =
|
|
||||||
self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
|
|
||||||
|
|
||||||
// matching strategy
|
|
||||||
for (key, value) in matching_strategy.into_iter() {
|
|
||||||
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
|
|
||||||
*matching_strategy = matching_strategy.saturating_add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
self.show_ranking_score |= show_ranking_score;
|
|
||||||
self.show_ranking_score_details |= show_ranking_score_details;
|
|
||||||
self.ranking_score_threshold |= ranking_score_threshold;
|
|
||||||
|
|
||||||
// locales
|
|
||||||
self.locales.append(&mut locales);
|
|
||||||
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
time_spent,
|
|
||||||
sort_with_geo_point,
|
|
||||||
sort_sum_of_criteria_terms,
|
|
||||||
sort_total_number_of_criteria,
|
|
||||||
distinct,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
attributes_to_search_on_total_number_of_uses,
|
|
||||||
max_terms_number,
|
|
||||||
max_vector_size,
|
|
||||||
retrieve_vectors,
|
|
||||||
matching_strategy,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
finite_pagination,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
max_attributes_to_highlight,
|
|
||||||
highlight_pre_tag,
|
|
||||||
highlight_post_tag,
|
|
||||||
max_attributes_to_crop,
|
|
||||||
crop_marker,
|
|
||||||
show_matches_position,
|
|
||||||
crop_length,
|
|
||||||
facets_sum_of_terms,
|
|
||||||
facets_total_number_of_facets,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
semantic_ratio,
|
|
||||||
hybrid,
|
|
||||||
total_degraded,
|
|
||||||
total_used_negative_operator,
|
|
||||||
ranking_score_threshold,
|
|
||||||
locales,
|
|
||||||
marker: _,
|
|
||||||
} = *self;
|
|
||||||
|
|
||||||
// we get all the values in a sorted manner
|
|
||||||
let time_spent = time_spent.into_sorted_vec();
|
|
||||||
// the index of the 99th percentage of value
|
|
||||||
let percentile_99th = time_spent.len() * 99 / 100;
|
|
||||||
// We are only interested by the slowest value of the 99th fastest results
|
|
||||||
let time_spent = time_spent.get(percentile_99th);
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"requests": {
|
|
||||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
|
||||||
"total_succeeded": total_succeeded,
|
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
||||||
"total_received": total_received,
|
|
||||||
"total_degraded": total_degraded,
|
|
||||||
"total_used_negative_operator": total_used_negative_operator,
|
|
||||||
},
|
|
||||||
"sort": {
|
|
||||||
"with_geoPoint": sort_with_geo_point,
|
|
||||||
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
|
|
||||||
},
|
|
||||||
"distinct": distinct,
|
|
||||||
"filter": {
|
|
||||||
"with_geoRadius": filter_with_geo_radius,
|
|
||||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
|
||||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
|
||||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
||||||
},
|
|
||||||
"attributes_to_search_on": {
|
|
||||||
"total_number_of_uses": attributes_to_search_on_total_number_of_uses,
|
|
||||||
},
|
|
||||||
"q": {
|
|
||||||
"max_terms_number": max_terms_number,
|
|
||||||
},
|
|
||||||
"vector": {
|
|
||||||
"max_vector_size": max_vector_size,
|
|
||||||
"retrieve_vectors": retrieve_vectors,
|
|
||||||
},
|
|
||||||
"hybrid": {
|
|
||||||
"enabled": hybrid,
|
|
||||||
"semantic_ratio": semantic_ratio,
|
|
||||||
},
|
|
||||||
"pagination": {
|
|
||||||
"max_limit": max_limit,
|
|
||||||
"max_offset": max_offset,
|
|
||||||
"most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
|
|
||||||
},
|
|
||||||
"formatting": {
|
|
||||||
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
|
||||||
"max_attributes_to_highlight": max_attributes_to_highlight,
|
|
||||||
"highlight_pre_tag": highlight_pre_tag,
|
|
||||||
"highlight_post_tag": highlight_post_tag,
|
|
||||||
"max_attributes_to_crop": max_attributes_to_crop,
|
|
||||||
"crop_marker": crop_marker,
|
|
||||||
"show_matches_position": show_matches_position,
|
|
||||||
"crop_length": crop_length,
|
|
||||||
},
|
|
||||||
"facets": {
|
|
||||||
"avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
|
|
||||||
},
|
|
||||||
"matching_strategy": {
|
|
||||||
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
||||||
},
|
|
||||||
"locales": locales,
|
|
||||||
"scoring": {
|
|
||||||
"show_ranking_score": show_ranking_score,
|
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
|
||||||
"ranking_score_threshold": ranking_score_threshold,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,14 +1,15 @@
|
|||||||
use super::settings_analytics::*;
|
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use deserr::actix_web::AwebJson;
|
use deserr::actix_web::AwebJson;
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::update::Setting;
|
use meilisearch_types::milli::update::Setting;
|
||||||
use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked};
|
use meilisearch_types::settings::{settings, RankingRuleView, SecretPolicy, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
|
use serde_json::json;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::Analytics;
|
||||||
@@ -19,7 +20,7 @@ use crate::Opt;
|
|||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! make_setting_route {
|
macro_rules! make_setting_route {
|
||||||
($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => {
|
($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
|
||||||
pub mod $attr {
|
pub mod $attr {
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||||
@@ -79,7 +80,7 @@ macro_rules! make_setting_route {
|
|||||||
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
|
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
$analytics_var: web::Data<dyn Analytics>,
|
||||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
@@ -87,10 +88,7 @@ macro_rules! make_setting_route {
|
|||||||
debug!(parameters = ?body, "Update settings");
|
debug!(parameters = ?body, "Update settings");
|
||||||
|
|
||||||
#[allow(clippy::redundant_closure_call)]
|
#[allow(clippy::redundant_closure_call)]
|
||||||
analytics.publish(
|
$analytics(&body, &req);
|
||||||
$crate::routes::indexes::settings_analytics::$analytics::new(body.as_ref()).into_settings(),
|
|
||||||
&req,
|
|
||||||
);
|
|
||||||
|
|
||||||
let new_settings = Settings {
|
let new_settings = Settings {
|
||||||
$attr: match body {
|
$attr: match body {
|
||||||
@@ -162,7 +160,21 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
filterable_attributes,
|
filterable_attributes,
|
||||||
"filterableAttributes",
|
"filterableAttributes",
|
||||||
FilterableAttributesAnalytics
|
analytics,
|
||||||
|
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"FilterableAttributes Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"filterable_attributes": {
|
||||||
|
"total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
|
||||||
|
"has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -174,7 +186,21 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
sortable_attributes,
|
sortable_attributes,
|
||||||
"sortableAttributes",
|
"sortableAttributes",
|
||||||
SortableAttributesAnalytics
|
analytics,
|
||||||
|
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"SortableAttributes Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"sortable_attributes": {
|
||||||
|
"total": setting.as_ref().map(|sort| sort.len()),
|
||||||
|
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -186,7 +212,21 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
displayed_attributes,
|
displayed_attributes,
|
||||||
"displayedAttributes",
|
"displayedAttributes",
|
||||||
DisplayedAttributesAnalytics
|
analytics,
|
||||||
|
|displayed: &Option<Vec<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"DisplayedAttributes Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"displayed_attributes": {
|
||||||
|
"total": displayed.as_ref().map(|displayed| displayed.len()),
|
||||||
|
"with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -198,7 +238,40 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
typo_tolerance,
|
typo_tolerance,
|
||||||
"typoTolerance",
|
"typoTolerance",
|
||||||
TypoToleranceAnalytics
|
analytics,
|
||||||
|
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"TypoTolerance Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"typo_tolerance": {
|
||||||
|
"enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
|
||||||
|
"disable_on_attributes": setting
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
|
||||||
|
"disable_on_words": setting
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
|
||||||
|
"min_word_size_for_one_typo": setting
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|s| s.min_word_size_for_typos
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.map(|s| s.one_typo.set()))
|
||||||
|
.flatten(),
|
||||||
|
"min_word_size_for_two_typos": setting
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|s| s.min_word_size_for_typos
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.map(|s| s.two_typos.set()))
|
||||||
|
.flatten(),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -210,7 +283,21 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
"searchableAttributes",
|
"searchableAttributes",
|
||||||
SearchableAttributesAnalytics
|
analytics,
|
||||||
|
|setting: &Option<Vec<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"SearchableAttributes Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"searchable_attributes": {
|
||||||
|
"total": setting.as_ref().map(|searchable| searchable.len()),
|
||||||
|
"with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -222,7 +309,20 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
stop_words,
|
stop_words,
|
||||||
"stopWords",
|
"stopWords",
|
||||||
StopWordsAnalytics
|
analytics,
|
||||||
|
|stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"StopWords Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"stop_words": {
|
||||||
|
"total": stop_words.as_ref().map(|stop_words| stop_words.len()),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -234,7 +334,20 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
non_separator_tokens,
|
non_separator_tokens,
|
||||||
"nonSeparatorTokens",
|
"nonSeparatorTokens",
|
||||||
NonSeparatorTokensAnalytics
|
analytics,
|
||||||
|
|non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"nonSeparatorTokens Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"non_separator_tokens": {
|
||||||
|
"total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -246,7 +359,20 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
separator_tokens,
|
separator_tokens,
|
||||||
"separatorTokens",
|
"separatorTokens",
|
||||||
SeparatorTokensAnalytics
|
analytics,
|
||||||
|
|separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"separatorTokens Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"separator_tokens": {
|
||||||
|
"total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -258,7 +384,20 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
dictionary,
|
dictionary,
|
||||||
"dictionary",
|
"dictionary",
|
||||||
DictionaryAnalytics
|
analytics,
|
||||||
|
|dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"dictionary Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"dictionary": {
|
||||||
|
"total": dictionary.as_ref().map(|dictionary| dictionary.len()),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -270,7 +409,20 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
synonyms,
|
synonyms,
|
||||||
"synonyms",
|
"synonyms",
|
||||||
SynonymsAnalytics
|
analytics,
|
||||||
|
|synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"Synonyms Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"synonyms": {
|
||||||
|
"total": synonyms.as_ref().map(|synonyms| synonyms.len()),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -282,7 +434,19 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
distinct_attribute,
|
distinct_attribute,
|
||||||
"distinctAttribute",
|
"distinctAttribute",
|
||||||
DistinctAttributeAnalytics
|
analytics,
|
||||||
|
|distinct: &Option<String>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
analytics.publish(
|
||||||
|
"DistinctAttribute Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"distinct_attribute": {
|
||||||
|
"set": distinct.is_some(),
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -294,7 +458,20 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
proximity_precision,
|
proximity_precision,
|
||||||
"proximityPrecision",
|
"proximityPrecision",
|
||||||
ProximityPrecisionAnalytics
|
analytics,
|
||||||
|
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
analytics.publish(
|
||||||
|
"ProximityPrecision Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"proximity_precision": {
|
||||||
|
"set": precision.is_some(),
|
||||||
|
"value": precision.unwrap_or_default(),
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -306,7 +483,17 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
localized_attributes,
|
localized_attributes,
|
||||||
"localizedAttributes",
|
"localizedAttributes",
|
||||||
LocalesAnalytics
|
analytics,
|
||||||
|
|rules: &Option<Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
analytics.publish(
|
||||||
|
"LocalizedAttributesRules Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>())
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -318,7 +505,26 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
ranking_rules,
|
ranking_rules,
|
||||||
"rankingRules",
|
"rankingRules",
|
||||||
RankingRulesAnalytics
|
analytics,
|
||||||
|
|setting: &Option<Vec<meilisearch_types::settings::RankingRuleView>>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"RankingRules Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"ranking_rules": {
|
||||||
|
"words_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))),
|
||||||
|
"typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))),
|
||||||
|
"proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Proximity))),
|
||||||
|
"attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Attribute))),
|
||||||
|
"sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))),
|
||||||
|
"exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Exactness))),
|
||||||
|
"values": setting.as_ref().map(|rr| rr.iter().filter(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -330,7 +536,25 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
faceting,
|
faceting,
|
||||||
"faceting",
|
"faceting",
|
||||||
FacetingAnalytics
|
analytics,
|
||||||
|
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"Faceting Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"faceting": {
|
||||||
|
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
|
||||||
|
"sort_facet_values_by_star_count": setting.as_ref().and_then(|s| {
|
||||||
|
s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
|
||||||
|
}),
|
||||||
|
"sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -342,7 +566,20 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
pagination,
|
pagination,
|
||||||
"pagination",
|
"pagination",
|
||||||
PaginationAnalytics
|
analytics,
|
||||||
|
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"Pagination Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"pagination": {
|
||||||
|
"max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
@@ -354,8 +591,67 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
embedders,
|
embedders,
|
||||||
"embedders",
|
"embedders",
|
||||||
EmbeddersAnalytics
|
analytics,
|
||||||
|
|setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
|
||||||
|
|
||||||
|
|
||||||
|
analytics.publish(
|
||||||
|
"Embedders Updated".to_string(),
|
||||||
|
serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
|
||||||
|
Some(req),
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
fn embedder_analytics(
|
||||||
|
setting: Option<
|
||||||
|
&std::collections::BTreeMap<
|
||||||
|
String,
|
||||||
|
Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
|
||||||
|
>,
|
||||||
|
>,
|
||||||
|
) -> serde_json::Value {
|
||||||
|
let mut sources = std::collections::HashSet::new();
|
||||||
|
|
||||||
|
if let Some(s) = &setting {
|
||||||
|
for source in s
|
||||||
|
.values()
|
||||||
|
.filter_map(|config| config.clone().set())
|
||||||
|
.filter_map(|config| config.source.set())
|
||||||
|
{
|
||||||
|
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
||||||
|
match source {
|
||||||
|
EmbedderSource::OpenAi => sources.insert("openAi"),
|
||||||
|
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
|
||||||
|
EmbedderSource::UserProvided => sources.insert("userProvided"),
|
||||||
|
EmbedderSource::Ollama => sources.insert("ollama"),
|
||||||
|
EmbedderSource::Rest => sources.insert("rest"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let document_template_used = setting.as_ref().map(|map| {
|
||||||
|
map.values()
|
||||||
|
.filter_map(|config| config.clone().set())
|
||||||
|
.any(|config| config.document_template.set().is_some())
|
||||||
|
});
|
||||||
|
|
||||||
|
let document_template_max_bytes = setting.as_ref().and_then(|map| {
|
||||||
|
map.values()
|
||||||
|
.filter_map(|config| config.clone().set())
|
||||||
|
.filter_map(|config| config.document_template_max_bytes.set())
|
||||||
|
.max()
|
||||||
|
});
|
||||||
|
|
||||||
|
json!(
|
||||||
|
{
|
||||||
|
"total": setting.as_ref().map(|s| s.len()),
|
||||||
|
"sources": sources,
|
||||||
|
"document_template_used": document_template_used,
|
||||||
|
"document_template_max_bytes": document_template_max_bytes
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
"/search-cutoff-ms",
|
"/search-cutoff-ms",
|
||||||
@@ -366,7 +662,14 @@ make_setting_route!(
|
|||||||
>,
|
>,
|
||||||
search_cutoff_ms,
|
search_cutoff_ms,
|
||||||
"searchCutoffMs",
|
"searchCutoffMs",
|
||||||
SearchCutoffMsAnalytics
|
analytics,
|
||||||
|
|setting: &Option<u64>, req: &HttpRequest| {
|
||||||
|
analytics.publish(
|
||||||
|
"Search Cutoff Updated".to_string(),
|
||||||
|
serde_json::json!({"search_cutoff_ms": setting }),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
macro_rules! generate_configure {
|
macro_rules! generate_configure {
|
||||||
@@ -410,7 +713,7 @@ pub async fn update_all(
|
|||||||
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
|
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
@@ -419,45 +722,104 @@ pub async fn update_all(
|
|||||||
let new_settings = validate_settings(new_settings, &index_scheduler)?;
|
let new_settings = validate_settings(new_settings, &index_scheduler)?;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
SettingsAnalytics {
|
"Settings Updated".to_string(),
|
||||||
ranking_rules: RankingRulesAnalytics::new(new_settings.ranking_rules.as_ref().set()),
|
json!({
|
||||||
searchable_attributes: SearchableAttributesAnalytics::new(
|
"ranking_rules": {
|
||||||
new_settings.searchable_attributes.as_ref().set(),
|
"words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Words))),
|
||||||
),
|
"typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Typo))),
|
||||||
displayed_attributes: DisplayedAttributesAnalytics::new(
|
"proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Proximity))),
|
||||||
new_settings.displayed_attributes.as_ref().set(),
|
"attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Attribute))),
|
||||||
),
|
"sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Sort))),
|
||||||
sortable_attributes: SortableAttributesAnalytics::new(
|
"exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Exactness))),
|
||||||
new_settings.sortable_attributes.as_ref().set(),
|
"values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !matches!(s, RankingRuleView::Asc(_) | RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
|
||||||
),
|
|
||||||
filterable_attributes: FilterableAttributesAnalytics::new(
|
|
||||||
new_settings.filterable_attributes.as_ref().set(),
|
|
||||||
),
|
|
||||||
distinct_attribute: DistinctAttributeAnalytics::new(
|
|
||||||
new_settings.distinct_attribute.as_ref().set(),
|
|
||||||
),
|
|
||||||
proximity_precision: ProximityPrecisionAnalytics::new(
|
|
||||||
new_settings.proximity_precision.as_ref().set(),
|
|
||||||
),
|
|
||||||
typo_tolerance: TypoToleranceAnalytics::new(new_settings.typo_tolerance.as_ref().set()),
|
|
||||||
faceting: FacetingAnalytics::new(new_settings.faceting.as_ref().set()),
|
|
||||||
pagination: PaginationAnalytics::new(new_settings.pagination.as_ref().set()),
|
|
||||||
stop_words: StopWordsAnalytics::new(new_settings.stop_words.as_ref().set()),
|
|
||||||
synonyms: SynonymsAnalytics::new(new_settings.synonyms.as_ref().set()),
|
|
||||||
embedders: EmbeddersAnalytics::new(new_settings.embedders.as_ref().set()),
|
|
||||||
search_cutoff_ms: SearchCutoffMsAnalytics::new(
|
|
||||||
new_settings.search_cutoff_ms.as_ref().set(),
|
|
||||||
),
|
|
||||||
locales: LocalesAnalytics::new(new_settings.localized_attributes.as_ref().set()),
|
|
||||||
dictionary: DictionaryAnalytics::new(new_settings.dictionary.as_ref().set()),
|
|
||||||
separator_tokens: SeparatorTokensAnalytics::new(
|
|
||||||
new_settings.separator_tokens.as_ref().set(),
|
|
||||||
),
|
|
||||||
non_separator_tokens: NonSeparatorTokensAnalytics::new(
|
|
||||||
new_settings.non_separator_tokens.as_ref().set(),
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
&req,
|
"searchable_attributes": {
|
||||||
|
"total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
|
||||||
|
"with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
|
||||||
|
},
|
||||||
|
"displayed_attributes": {
|
||||||
|
"total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()),
|
||||||
|
"with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
|
||||||
|
},
|
||||||
|
"sortable_attributes": {
|
||||||
|
"total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
|
||||||
|
"has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
|
||||||
|
},
|
||||||
|
"filterable_attributes": {
|
||||||
|
"total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
|
||||||
|
"has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
|
||||||
|
},
|
||||||
|
"distinct_attribute": {
|
||||||
|
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||||
|
},
|
||||||
|
"proximity_precision": {
|
||||||
|
"set": new_settings.proximity_precision.as_ref().set().is_some(),
|
||||||
|
"value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
|
||||||
|
},
|
||||||
|
"typo_tolerance": {
|
||||||
|
"enabled": new_settings.typo_tolerance
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.enabled.as_ref().set())
|
||||||
|
.copied(),
|
||||||
|
"disable_on_attributes": new_settings.typo_tolerance
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
|
||||||
|
"disable_on_words": new_settings.typo_tolerance
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
|
||||||
|
"min_word_size_for_one_typo": new_settings.typo_tolerance
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.min_word_size_for_typos
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.map(|s| s.one_typo.set()))
|
||||||
|
.flatten(),
|
||||||
|
"min_word_size_for_two_typos": new_settings.typo_tolerance
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.min_word_size_for_typos
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.map(|s| s.two_typos.set()))
|
||||||
|
.flatten(),
|
||||||
|
},
|
||||||
|
"faceting": {
|
||||||
|
"max_values_per_facet": new_settings.faceting
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.max_values_per_facet.as_ref().set()),
|
||||||
|
"sort_facet_values_by_star_count": new_settings.faceting
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| {
|
||||||
|
s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
|
||||||
|
}),
|
||||||
|
"sort_facet_values_by_total": new_settings.faceting
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"max_total_hits": new_settings.pagination
|
||||||
|
.as_ref()
|
||||||
|
.set()
|
||||||
|
.and_then(|s| s.max_total_hits.as_ref().set()),
|
||||||
|
},
|
||||||
|
"stop_words": {
|
||||||
|
"total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()),
|
||||||
|
},
|
||||||
|
"synonyms": {
|
||||||
|
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
|
||||||
|
},
|
||||||
|
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
|
||||||
|
"search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
|
||||||
|
"locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>()),
|
||||||
|
}),
|
||||||
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
|
|||||||
@@ -1,621 +0,0 @@
|
|||||||
//! All the structures used to make the analytics on the settings works.
|
|
||||||
//! The signatures of the `new` functions are not very rust idiomatic because they must match the types received
|
|
||||||
//! through the sub-settings route directly without any manipulation.
|
|
||||||
//! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`.
|
|
||||||
|
|
||||||
use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
|
|
||||||
use meilisearch_types::milli::update::Setting;
|
|
||||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
|
||||||
use meilisearch_types::settings::{
|
|
||||||
FacetingSettings, PaginationSettings, ProximityPrecisionView, TypoSettings,
|
|
||||||
};
|
|
||||||
use meilisearch_types::{facet_values_sort::FacetValuesSort, settings::RankingRuleView};
|
|
||||||
use serde::Serialize;
|
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
|
||||||
|
|
||||||
use crate::analytics::Aggregate;
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct SettingsAnalytics {
|
|
||||||
pub ranking_rules: RankingRulesAnalytics,
|
|
||||||
pub searchable_attributes: SearchableAttributesAnalytics,
|
|
||||||
pub displayed_attributes: DisplayedAttributesAnalytics,
|
|
||||||
pub sortable_attributes: SortableAttributesAnalytics,
|
|
||||||
pub filterable_attributes: FilterableAttributesAnalytics,
|
|
||||||
pub distinct_attribute: DistinctAttributeAnalytics,
|
|
||||||
pub proximity_precision: ProximityPrecisionAnalytics,
|
|
||||||
pub typo_tolerance: TypoToleranceAnalytics,
|
|
||||||
pub faceting: FacetingAnalytics,
|
|
||||||
pub pagination: PaginationAnalytics,
|
|
||||||
pub stop_words: StopWordsAnalytics,
|
|
||||||
pub synonyms: SynonymsAnalytics,
|
|
||||||
pub embedders: EmbeddersAnalytics,
|
|
||||||
pub search_cutoff_ms: SearchCutoffMsAnalytics,
|
|
||||||
pub locales: LocalesAnalytics,
|
|
||||||
pub dictionary: DictionaryAnalytics,
|
|
||||||
pub separator_tokens: SeparatorTokensAnalytics,
|
|
||||||
pub non_separator_tokens: NonSeparatorTokensAnalytics,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for SettingsAnalytics {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Settings Updated"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
ranking_rules: RankingRulesAnalytics {
|
|
||||||
words_position: new
|
|
||||||
.ranking_rules
|
|
||||||
.words_position
|
|
||||||
.or(self.ranking_rules.words_position),
|
|
||||||
typo_position: new.ranking_rules.typo_position.or(self.ranking_rules.typo_position),
|
|
||||||
proximity_position: new
|
|
||||||
.ranking_rules
|
|
||||||
.proximity_position
|
|
||||||
.or(self.ranking_rules.proximity_position),
|
|
||||||
attribute_position: new
|
|
||||||
.ranking_rules
|
|
||||||
.attribute_position
|
|
||||||
.or(self.ranking_rules.attribute_position),
|
|
||||||
sort_position: new.ranking_rules.sort_position.or(self.ranking_rules.sort_position),
|
|
||||||
exactness_position: new
|
|
||||||
.ranking_rules
|
|
||||||
.exactness_position
|
|
||||||
.or(self.ranking_rules.exactness_position),
|
|
||||||
values: new.ranking_rules.values.or(self.ranking_rules.values),
|
|
||||||
},
|
|
||||||
searchable_attributes: SearchableAttributesAnalytics {
|
|
||||||
total: new.searchable_attributes.total.or(self.searchable_attributes.total),
|
|
||||||
with_wildcard: new
|
|
||||||
.searchable_attributes
|
|
||||||
.with_wildcard
|
|
||||||
.or(self.searchable_attributes.with_wildcard),
|
|
||||||
},
|
|
||||||
displayed_attributes: DisplayedAttributesAnalytics {
|
|
||||||
total: new.displayed_attributes.total.or(self.displayed_attributes.total),
|
|
||||||
with_wildcard: new
|
|
||||||
.displayed_attributes
|
|
||||||
.with_wildcard
|
|
||||||
.or(self.displayed_attributes.with_wildcard),
|
|
||||||
},
|
|
||||||
sortable_attributes: SortableAttributesAnalytics {
|
|
||||||
total: new.sortable_attributes.total.or(self.sortable_attributes.total),
|
|
||||||
has_geo: new.sortable_attributes.has_geo.or(self.sortable_attributes.has_geo),
|
|
||||||
},
|
|
||||||
filterable_attributes: FilterableAttributesAnalytics {
|
|
||||||
total: new.filterable_attributes.total.or(self.filterable_attributes.total),
|
|
||||||
has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo),
|
|
||||||
},
|
|
||||||
distinct_attribute: DistinctAttributeAnalytics {
|
|
||||||
set: self.distinct_attribute.set | new.distinct_attribute.set,
|
|
||||||
},
|
|
||||||
proximity_precision: ProximityPrecisionAnalytics {
|
|
||||||
set: self.proximity_precision.set | new.proximity_precision.set,
|
|
||||||
value: new.proximity_precision.value.or(self.proximity_precision.value),
|
|
||||||
},
|
|
||||||
typo_tolerance: TypoToleranceAnalytics {
|
|
||||||
enabled: new.typo_tolerance.enabled.or(self.typo_tolerance.enabled),
|
|
||||||
disable_on_attributes: new
|
|
||||||
.typo_tolerance
|
|
||||||
.disable_on_attributes
|
|
||||||
.or(self.typo_tolerance.disable_on_attributes),
|
|
||||||
disable_on_words: new
|
|
||||||
.typo_tolerance
|
|
||||||
.disable_on_words
|
|
||||||
.or(self.typo_tolerance.disable_on_words),
|
|
||||||
min_word_size_for_one_typo: new
|
|
||||||
.typo_tolerance
|
|
||||||
.min_word_size_for_one_typo
|
|
||||||
.or(self.typo_tolerance.min_word_size_for_one_typo),
|
|
||||||
min_word_size_for_two_typos: new
|
|
||||||
.typo_tolerance
|
|
||||||
.min_word_size_for_two_typos
|
|
||||||
.or(self.typo_tolerance.min_word_size_for_two_typos),
|
|
||||||
},
|
|
||||||
faceting: FacetingAnalytics {
|
|
||||||
max_values_per_facet: new
|
|
||||||
.faceting
|
|
||||||
.max_values_per_facet
|
|
||||||
.or(self.faceting.max_values_per_facet),
|
|
||||||
sort_facet_values_by_star_count: new
|
|
||||||
.faceting
|
|
||||||
.sort_facet_values_by_star_count
|
|
||||||
.or(self.faceting.sort_facet_values_by_star_count),
|
|
||||||
sort_facet_values_by_total: new
|
|
||||||
.faceting
|
|
||||||
.sort_facet_values_by_total
|
|
||||||
.or(self.faceting.sort_facet_values_by_total),
|
|
||||||
},
|
|
||||||
pagination: PaginationAnalytics {
|
|
||||||
max_total_hits: new.pagination.max_total_hits.or(self.pagination.max_total_hits),
|
|
||||||
},
|
|
||||||
stop_words: StopWordsAnalytics {
|
|
||||||
total: new.stop_words.total.or(self.stop_words.total),
|
|
||||||
},
|
|
||||||
synonyms: SynonymsAnalytics { total: new.synonyms.total.or(self.synonyms.total) },
|
|
||||||
embedders: EmbeddersAnalytics {
|
|
||||||
total: new.embedders.total.or(self.embedders.total),
|
|
||||||
sources: match (self.embedders.sources, new.embedders.sources) {
|
|
||||||
(None, None) => None,
|
|
||||||
(Some(sources), None) | (None, Some(sources)) => Some(sources),
|
|
||||||
(Some(this), Some(other)) => Some(this.union(&other).cloned().collect()),
|
|
||||||
},
|
|
||||||
document_template_used: match (
|
|
||||||
self.embedders.document_template_used,
|
|
||||||
new.embedders.document_template_used,
|
|
||||||
) {
|
|
||||||
(None, None) => None,
|
|
||||||
(Some(used), None) | (None, Some(used)) => Some(used),
|
|
||||||
(Some(this), Some(other)) => Some(this | other),
|
|
||||||
},
|
|
||||||
document_template_max_bytes: match (
|
|
||||||
self.embedders.document_template_max_bytes,
|
|
||||||
new.embedders.document_template_max_bytes,
|
|
||||||
) {
|
|
||||||
(None, None) => None,
|
|
||||||
(Some(bytes), None) | (None, Some(bytes)) => Some(bytes),
|
|
||||||
(Some(this), Some(other)) => Some(this.max(other)),
|
|
||||||
},
|
|
||||||
binary_quantization_used: match (
|
|
||||||
self.embedders.binary_quantization_used,
|
|
||||||
new.embedders.binary_quantization_used,
|
|
||||||
) {
|
|
||||||
(None, None) => None,
|
|
||||||
(Some(bq), None) | (None, Some(bq)) => Some(bq),
|
|
||||||
(Some(this), Some(other)) => Some(this | other),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
search_cutoff_ms: SearchCutoffMsAnalytics {
|
|
||||||
search_cutoff_ms: new
|
|
||||||
.search_cutoff_ms
|
|
||||||
.search_cutoff_ms
|
|
||||||
.or(self.search_cutoff_ms.search_cutoff_ms),
|
|
||||||
},
|
|
||||||
locales: LocalesAnalytics { locales: new.locales.locales.or(self.locales.locales) },
|
|
||||||
dictionary: DictionaryAnalytics {
|
|
||||||
total: new.dictionary.total.or(self.dictionary.total),
|
|
||||||
},
|
|
||||||
separator_tokens: SeparatorTokensAnalytics {
|
|
||||||
total: new.non_separator_tokens.total.or(self.separator_tokens.total),
|
|
||||||
},
|
|
||||||
non_separator_tokens: NonSeparatorTokensAnalytics {
|
|
||||||
total: new.non_separator_tokens.total.or(self.non_separator_tokens.total),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct RankingRulesAnalytics {
|
|
||||||
pub words_position: Option<usize>,
|
|
||||||
pub typo_position: Option<usize>,
|
|
||||||
pub proximity_position: Option<usize>,
|
|
||||||
pub attribute_position: Option<usize>,
|
|
||||||
pub sort_position: Option<usize>,
|
|
||||||
pub exactness_position: Option<usize>,
|
|
||||||
pub values: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RankingRulesAnalytics {
|
|
||||||
pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
|
|
||||||
RankingRulesAnalytics {
|
|
||||||
words_position: rr.as_ref().and_then(|rr| {
|
|
||||||
rr.iter()
|
|
||||||
.position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
|
|
||||||
}),
|
|
||||||
typo_position: rr.as_ref().and_then(|rr| {
|
|
||||||
rr.iter()
|
|
||||||
.position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
|
|
||||||
}),
|
|
||||||
proximity_position: rr.as_ref().and_then(|rr| {
|
|
||||||
rr.iter().position(|s| {
|
|
||||||
matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
|
|
||||||
})
|
|
||||||
}),
|
|
||||||
attribute_position: rr.as_ref().and_then(|rr| {
|
|
||||||
rr.iter().position(|s| {
|
|
||||||
matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
|
|
||||||
})
|
|
||||||
}),
|
|
||||||
sort_position: rr.as_ref().and_then(|rr| {
|
|
||||||
rr.iter()
|
|
||||||
.position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
|
|
||||||
}),
|
|
||||||
exactness_position: rr.as_ref().and_then(|rr| {
|
|
||||||
rr.iter().position(|s| {
|
|
||||||
matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
|
|
||||||
})
|
|
||||||
}),
|
|
||||||
values: rr.as_ref().map(|rr| {
|
|
||||||
rr.iter()
|
|
||||||
.filter(|s| {
|
|
||||||
matches!(
|
|
||||||
s,
|
|
||||||
meilisearch_types::settings::RankingRuleView::Asc(_)
|
|
||||||
| meilisearch_types::settings::RankingRuleView::Desc(_)
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.map(|x| x.to_string())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(", ")
|
|
||||||
}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { ranking_rules: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct SearchableAttributesAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
pub with_wildcard: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SearchableAttributesAnalytics {
|
|
||||||
pub fn new(setting: Option<&Vec<String>>) -> Self {
|
|
||||||
Self {
|
|
||||||
total: setting.as_ref().map(|searchable| searchable.len()),
|
|
||||||
with_wildcard: setting
|
|
||||||
.as_ref()
|
|
||||||
.map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { searchable_attributes: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct DisplayedAttributesAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
pub with_wildcard: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DisplayedAttributesAnalytics {
|
|
||||||
pub fn new(displayed: Option<&Vec<String>>) -> Self {
|
|
||||||
Self {
|
|
||||||
total: displayed.as_ref().map(|displayed| displayed.len()),
|
|
||||||
with_wildcard: displayed
|
|
||||||
.as_ref()
|
|
||||||
.map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { displayed_attributes: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct SortableAttributesAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
pub has_geo: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SortableAttributesAnalytics {
|
|
||||||
pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
|
|
||||||
Self {
|
|
||||||
total: setting.as_ref().map(|sort| sort.len()),
|
|
||||||
has_geo: setting.as_ref().map(|sort| sort.contains("_geo")),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { sortable_attributes: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct FilterableAttributesAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
pub has_geo: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FilterableAttributesAnalytics {
|
|
||||||
pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
|
|
||||||
Self {
|
|
||||||
total: setting.as_ref().map(|filter| filter.len()),
|
|
||||||
has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { filterable_attributes: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct DistinctAttributeAnalytics {
|
|
||||||
pub set: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DistinctAttributeAnalytics {
|
|
||||||
pub fn new(distinct: Option<&String>) -> Self {
|
|
||||||
Self { set: distinct.is_some() }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { distinct_attribute: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct ProximityPrecisionAnalytics {
|
|
||||||
pub set: bool,
|
|
||||||
pub value: Option<ProximityPrecisionView>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ProximityPrecisionAnalytics {
|
|
||||||
pub fn new(precision: Option<&ProximityPrecisionView>) -> Self {
|
|
||||||
Self { set: precision.is_some(), value: precision.cloned() }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { proximity_precision: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct TypoToleranceAnalytics {
|
|
||||||
pub enabled: Option<bool>,
|
|
||||||
pub disable_on_attributes: Option<bool>,
|
|
||||||
pub disable_on_words: Option<bool>,
|
|
||||||
pub min_word_size_for_one_typo: Option<u8>,
|
|
||||||
pub min_word_size_for_two_typos: Option<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TypoToleranceAnalytics {
|
|
||||||
pub fn new(setting: Option<&TypoSettings>) -> Self {
|
|
||||||
Self {
|
|
||||||
enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
|
|
||||||
disable_on_attributes: setting
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
|
|
||||||
disable_on_words: setting
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
|
|
||||||
min_word_size_for_one_typo: setting
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set()))
|
|
||||||
.flatten(),
|
|
||||||
min_word_size_for_two_typos: setting
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set()))
|
|
||||||
.flatten(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { typo_tolerance: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct FacetingAnalytics {
|
|
||||||
pub max_values_per_facet: Option<usize>,
|
|
||||||
pub sort_facet_values_by_star_count: Option<bool>,
|
|
||||||
pub sort_facet_values_by_total: Option<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FacetingAnalytics {
|
|
||||||
pub fn new(setting: Option<&FacetingSettings>) -> Self {
|
|
||||||
Self {
|
|
||||||
max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
|
|
||||||
sort_facet_values_by_star_count: setting.as_ref().and_then(|s| {
|
|
||||||
s.sort_facet_values_by
|
|
||||||
.as_ref()
|
|
||||||
.set()
|
|
||||||
.map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
|
|
||||||
}),
|
|
||||||
sort_facet_values_by_total: setting
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { faceting: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct PaginationAnalytics {
|
|
||||||
pub max_total_hits: Option<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PaginationAnalytics {
|
|
||||||
pub fn new(setting: Option<&PaginationSettings>) -> Self {
|
|
||||||
Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { pagination: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct StopWordsAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StopWordsAnalytics {
|
|
||||||
pub fn new(stop_words: Option<&BTreeSet<String>>) -> Self {
|
|
||||||
Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { stop_words: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct SynonymsAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SynonymsAnalytics {
|
|
||||||
pub fn new(synonyms: Option<&BTreeMap<String, Vec<String>>>) -> Self {
|
|
||||||
Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { synonyms: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct EmbeddersAnalytics {
|
|
||||||
// last
|
|
||||||
pub total: Option<usize>,
|
|
||||||
// Merge the sources
|
|
||||||
pub sources: Option<HashSet<String>>,
|
|
||||||
// |=
|
|
||||||
pub document_template_used: Option<bool>,
|
|
||||||
// max
|
|
||||||
pub document_template_max_bytes: Option<usize>,
|
|
||||||
// |=
|
|
||||||
pub binary_quantization_used: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EmbeddersAnalytics {
|
|
||||||
pub fn new(setting: Option<&BTreeMap<String, Setting<EmbeddingSettings>>>) -> Self {
|
|
||||||
let mut sources = std::collections::HashSet::new();
|
|
||||||
|
|
||||||
if let Some(s) = &setting {
|
|
||||||
for source in s
|
|
||||||
.values()
|
|
||||||
.filter_map(|config| config.clone().set())
|
|
||||||
.filter_map(|config| config.source.set())
|
|
||||||
{
|
|
||||||
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
|
||||||
match source {
|
|
||||||
EmbedderSource::OpenAi => sources.insert("openAi".to_string()),
|
|
||||||
EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()),
|
|
||||||
EmbedderSource::UserProvided => sources.insert("userProvided".to_string()),
|
|
||||||
EmbedderSource::Ollama => sources.insert("ollama".to_string()),
|
|
||||||
EmbedderSource::Rest => sources.insert("rest".to_string()),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
Self {
|
|
||||||
total: setting.as_ref().map(|s| s.len()),
|
|
||||||
sources: Some(sources),
|
|
||||||
document_template_used: setting.as_ref().map(|map| {
|
|
||||||
map.values()
|
|
||||||
.filter_map(|config| config.clone().set())
|
|
||||||
.any(|config| config.document_template.set().is_some())
|
|
||||||
}),
|
|
||||||
document_template_max_bytes: setting.as_ref().and_then(|map| {
|
|
||||||
map.values()
|
|
||||||
.filter_map(|config| config.clone().set())
|
|
||||||
.filter_map(|config| config.document_template_max_bytes.set())
|
|
||||||
.max()
|
|
||||||
}),
|
|
||||||
binary_quantization_used: setting.as_ref().map(|map| {
|
|
||||||
map.values()
|
|
||||||
.filter_map(|config| config.clone().set())
|
|
||||||
.any(|config| config.binary_quantized.set().is_some())
|
|
||||||
}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { embedders: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
#[serde(transparent)]
|
|
||||||
pub struct SearchCutoffMsAnalytics {
|
|
||||||
pub search_cutoff_ms: Option<u64>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SearchCutoffMsAnalytics {
|
|
||||||
pub fn new(setting: Option<&u64>) -> Self {
|
|
||||||
Self { search_cutoff_ms: setting.copied() }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { search_cutoff_ms: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
#[serde(transparent)]
|
|
||||||
pub struct LocalesAnalytics {
|
|
||||||
pub locales: Option<BTreeSet<Locale>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LocalesAnalytics {
|
|
||||||
pub fn new(rules: Option<&Vec<LocalizedAttributesRuleView>>) -> Self {
|
|
||||||
LocalesAnalytics {
|
|
||||||
locales: rules.as_ref().map(|rules| {
|
|
||||||
rules
|
|
||||||
.iter()
|
|
||||||
.flat_map(|rule| rule.locales.iter().cloned())
|
|
||||||
.collect::<std::collections::BTreeSet<_>>()
|
|
||||||
}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { locales: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct DictionaryAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DictionaryAnalytics {
|
|
||||||
pub fn new(dictionary: Option<&BTreeSet<String>>) -> Self {
|
|
||||||
Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { dictionary: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct SeparatorTokensAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SeparatorTokensAnalytics {
|
|
||||||
pub fn new(separator_tokens: Option<&BTreeSet<String>>) -> Self {
|
|
||||||
Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { separator_tokens: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Default)]
|
|
||||||
pub struct NonSeparatorTokensAnalytics {
|
|
||||||
pub total: Option<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NonSeparatorTokensAnalytics {
|
|
||||||
pub fn new(non_separator_tokens: Option<&BTreeSet<String>>) -> Self {
|
|
||||||
Self {
|
|
||||||
total: non_separator_tokens
|
|
||||||
.as_ref()
|
|
||||||
.map(|non_separator_tokens| non_separator_tokens.len()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_settings(self) -> SettingsAnalytics {
|
|
||||||
SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -13,10 +13,9 @@ use serde_json::Value;
|
|||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use super::ActionPolicy;
|
use super::ActionPolicy;
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::{Analytics, SimilarAggregator};
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
|
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
||||||
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
@@ -35,13 +34,13 @@ pub async fn similar_get(
|
|||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
|
params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
let query = params.0.try_into()?;
|
let query = params.0.try_into()?;
|
||||||
|
|
||||||
let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);
|
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
debug!(parameters = ?query, "Similar get");
|
debug!(parameters = ?query, "Similar get");
|
||||||
|
|
||||||
@@ -50,7 +49,7 @@ pub async fn similar_get(
|
|||||||
if let Ok(similar) = &similar {
|
if let Ok(similar) = &similar {
|
||||||
aggregate.succeed(similar);
|
aggregate.succeed(similar);
|
||||||
}
|
}
|
||||||
analytics.publish(aggregate, &req);
|
analytics.get_similar(aggregate);
|
||||||
|
|
||||||
let similar = similar?;
|
let similar = similar?;
|
||||||
|
|
||||||
@@ -63,21 +62,21 @@ pub async fn similar_post(
|
|||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebJson<SimilarQuery, DeserrJsonError>,
|
params: AwebJson<SimilarQuery, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
let query = params.into_inner();
|
let query = params.into_inner();
|
||||||
debug!(parameters = ?query, "Similar post");
|
debug!(parameters = ?query, "Similar post");
|
||||||
|
|
||||||
let mut aggregate = SimilarAggregator::<SimilarPOST>::from_query(&query);
|
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
let similar = similar(index_scheduler, index_uid, query).await;
|
let similar = similar(index_scheduler, index_uid, query).await;
|
||||||
|
|
||||||
if let Ok(similar) = &similar {
|
if let Ok(similar) = &similar {
|
||||||
aggregate.succeed(similar);
|
aggregate.succeed(similar);
|
||||||
}
|
}
|
||||||
analytics.publish(aggregate, &req);
|
analytics.post_similar(aggregate);
|
||||||
|
|
||||||
let similar = similar?;
|
let similar = similar?;
|
||||||
|
|
||||||
@@ -103,8 +102,8 @@ async fn similar(
|
|||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
let (embedder_name, embedder, quantized) =
|
let (embedder_name, embedder) =
|
||||||
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
perform_similar(
|
perform_similar(
|
||||||
@@ -112,7 +111,6 @@ async fn similar(
|
|||||||
query,
|
query,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
quantized,
|
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
index_scheduler.features(),
|
index_scheduler.features(),
|
||||||
)
|
)
|
||||||
@@ -141,8 +139,8 @@ pub struct SimilarQueryGet {
|
|||||||
show_ranking_score_details: Param<bool>,
|
show_ranking_score_details: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||||
#[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||||
pub embedder: String,
|
pub embedder: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
|
|||||||
@@ -1,235 +0,0 @@
|
|||||||
use std::collections::{BinaryHeap, HashMap};
|
|
||||||
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use regex::Regex;
|
|
||||||
use serde_json::{json, Value};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
aggregate_methods,
|
|
||||||
analytics::{Aggregate, AggregateMethod},
|
|
||||||
search::{SimilarQuery, SimilarResult},
|
|
||||||
};
|
|
||||||
|
|
||||||
aggregate_methods!(
|
|
||||||
SimilarPOST => "Similar POST",
|
|
||||||
SimilarGET => "Similar GET",
|
|
||||||
);
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct SimilarAggregator<Method: AggregateMethod> {
|
|
||||||
// requests
|
|
||||||
total_received: usize,
|
|
||||||
total_succeeded: usize,
|
|
||||||
time_spent: BinaryHeap<usize>,
|
|
||||||
|
|
||||||
// filter
|
|
||||||
filter_with_geo_radius: bool,
|
|
||||||
filter_with_geo_bounding_box: bool,
|
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
|
||||||
filter_sum_of_criteria_terms: usize,
|
|
||||||
// every time a request has a filter, this field must be incremented by one
|
|
||||||
filter_total_number_of_criteria: usize,
|
|
||||||
used_syntax: HashMap<String, usize>,
|
|
||||||
|
|
||||||
// Whether a non-default embedder was specified
|
|
||||||
retrieve_vectors: bool,
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
max_limit: usize,
|
|
||||||
max_offset: usize,
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
max_attributes_to_retrieve: usize,
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
show_ranking_score: bool,
|
|
||||||
show_ranking_score_details: bool,
|
|
||||||
ranking_score_threshold: bool,
|
|
||||||
|
|
||||||
marker: std::marker::PhantomData<Method>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> SimilarAggregator<Method> {
|
|
||||||
#[allow(clippy::field_reassign_with_default)]
|
|
||||||
pub fn from_query(query: &SimilarQuery) -> Self {
|
|
||||||
let SimilarQuery {
|
|
||||||
id: _,
|
|
||||||
embedder: _,
|
|
||||||
offset,
|
|
||||||
limit,
|
|
||||||
attributes_to_retrieve: _,
|
|
||||||
retrieve_vectors,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
filter,
|
|
||||||
ranking_score_threshold,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
let mut ret = Self::default();
|
|
||||||
|
|
||||||
ret.total_received = 1;
|
|
||||||
|
|
||||||
if let Some(ref filter) = filter {
|
|
||||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
|
||||||
ret.filter_total_number_of_criteria = 1;
|
|
||||||
|
|
||||||
let syntax = match filter {
|
|
||||||
Value::String(_) => "string".to_string(),
|
|
||||||
Value::Array(values) => {
|
|
||||||
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
|
||||||
"mixed".to_string()
|
|
||||||
} else {
|
|
||||||
"array".to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => "none".to_string(),
|
|
||||||
};
|
|
||||||
// convert the string to a HashMap
|
|
||||||
ret.used_syntax.insert(syntax, 1);
|
|
||||||
|
|
||||||
let stringified_filters = filter.to_string();
|
|
||||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
|
||||||
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
|
||||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.max_limit = *limit;
|
|
||||||
ret.max_offset = *offset;
|
|
||||||
|
|
||||||
ret.show_ranking_score = *show_ranking_score;
|
|
||||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
|
||||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
|
||||||
|
|
||||||
ret.retrieve_vectors = *retrieve_vectors;
|
|
||||||
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn succeed(&mut self, result: &SimilarResult) {
|
|
||||||
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
|
|
||||||
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
||||||
|
|
||||||
self.time_spent.push(*processing_time_ms as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
Method::event_name()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Aggregate one [SimilarAggregator] into another.
|
|
||||||
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
mut time_spent,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
ranking_score_threshold,
|
|
||||||
retrieve_vectors,
|
|
||||||
marker: _,
|
|
||||||
} = *new;
|
|
||||||
|
|
||||||
// request
|
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
|
||||||
self.time_spent.append(&mut time_spent);
|
|
||||||
|
|
||||||
// filter
|
|
||||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
|
||||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
|
||||||
self.filter_sum_of_criteria_terms =
|
|
||||||
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
|
||||||
self.filter_total_number_of_criteria =
|
|
||||||
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
|
||||||
for (key, value) in used_syntax.into_iter() {
|
|
||||||
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
|
||||||
*used_syntax = used_syntax.saturating_add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
self.retrieve_vectors |= retrieve_vectors;
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
|
||||||
self.max_offset = self.max_offset.max(max_offset);
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
self.max_attributes_to_retrieve =
|
|
||||||
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
self.show_ranking_score |= show_ranking_score;
|
|
||||||
self.show_ranking_score_details |= show_ranking_score_details;
|
|
||||||
self.ranking_score_threshold |= ranking_score_threshold;
|
|
||||||
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
time_spent,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
ranking_score_threshold,
|
|
||||||
retrieve_vectors,
|
|
||||||
marker: _,
|
|
||||||
} = *self;
|
|
||||||
|
|
||||||
// we get all the values in a sorted manner
|
|
||||||
let time_spent = time_spent.into_sorted_vec();
|
|
||||||
// the index of the 99th percentage of value
|
|
||||||
let percentile_99th = time_spent.len() * 99 / 100;
|
|
||||||
// We are only interested by the slowest value of the 99th fastest results
|
|
||||||
let time_spent = time_spent.get(percentile_99th);
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"requests": {
|
|
||||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
|
||||||
"total_succeeded": total_succeeded,
|
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
||||||
"total_received": total_received,
|
|
||||||
},
|
|
||||||
"filter": {
|
|
||||||
"with_geoRadius": filter_with_geo_radius,
|
|
||||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
|
||||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
|
||||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
||||||
},
|
|
||||||
"vector": {
|
|
||||||
"retrieve_vectors": retrieve_vectors,
|
|
||||||
},
|
|
||||||
"pagination": {
|
|
||||||
"max_limit": max_limit,
|
|
||||||
"max_offset": max_offset,
|
|
||||||
},
|
|
||||||
"formatting": {
|
|
||||||
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
|
||||||
},
|
|
||||||
"scoring": {
|
|
||||||
"show_ranking_score": show_ranking_score,
|
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
|
||||||
"ranking_score_threshold": ranking_score_threshold,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -25,7 +25,6 @@ pub mod indexes;
|
|||||||
mod logs;
|
mod logs;
|
||||||
mod metrics;
|
mod metrics;
|
||||||
mod multi_search;
|
mod multi_search;
|
||||||
mod multi_search_analytics;
|
|
||||||
mod snapshot;
|
mod snapshot;
|
||||||
mod swap_indexes;
|
mod swap_indexes;
|
||||||
pub mod tasks;
|
pub mod tasks;
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ use meilisearch_types::keys::actions;
|
|||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
@@ -21,8 +21,6 @@ use crate::search::{
|
|||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
use super::multi_search_analytics::MultiSearchAggregator;
|
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
|
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
|
||||||
}
|
}
|
||||||
@@ -37,7 +35,7 @@ pub async fn multi_search_with_post(
|
|||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
// Since we don't want to process half of the search requests and then get a permit refused
|
// Since we don't want to process half of the search requests and then get a permit refused
|
||||||
// we're going to get one permit for the whole duration of the multi-search request.
|
// we're going to get one permit for the whole duration of the multi-search request.
|
||||||
@@ -45,7 +43,7 @@ pub async fn multi_search_with_post(
|
|||||||
|
|
||||||
let federated_search = params.into_inner();
|
let federated_search = params.into_inner();
|
||||||
|
|
||||||
let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search);
|
let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
|
||||||
|
|
||||||
let FederatedSearch { mut queries, federation } = federated_search;
|
let FederatedSearch { mut queries, federation } = federated_search;
|
||||||
|
|
||||||
@@ -89,7 +87,7 @@ pub async fn multi_search_with_post(
|
|||||||
multi_aggregate.succeed();
|
multi_aggregate.succeed();
|
||||||
}
|
}
|
||||||
|
|
||||||
analytics.publish(multi_aggregate, &req);
|
analytics.post_multi_search(multi_aggregate);
|
||||||
HttpResponse::Ok().json(search_result??)
|
HttpResponse::Ok().json(search_result??)
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
@@ -151,7 +149,7 @@ pub async fn multi_search_with_post(
|
|||||||
if search_results.is_ok() {
|
if search_results.is_ok() {
|
||||||
multi_aggregate.succeed();
|
multi_aggregate.succeed();
|
||||||
}
|
}
|
||||||
analytics.publish(multi_aggregate, &req);
|
analytics.post_multi_search(multi_aggregate);
|
||||||
|
|
||||||
let search_results = search_results.map_err(|(mut err, query_index)| {
|
let search_results = search_results.map_err(|(mut err, query_index)| {
|
||||||
// Add the query index that failed as context for the error message.
|
// Add the query index that failed as context for the error message.
|
||||||
|
|||||||
@@ -1,170 +0,0 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
use serde_json::json;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
analytics::Aggregate,
|
|
||||||
search::{FederatedSearch, SearchQueryWithIndex},
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct MultiSearchAggregator {
|
|
||||||
// requests
|
|
||||||
total_received: usize,
|
|
||||||
total_succeeded: usize,
|
|
||||||
|
|
||||||
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
|
|
||||||
total_distinct_index_count: usize,
|
|
||||||
// number of queries with a single index, use with total_received to compute a proportion
|
|
||||||
total_single_index: usize,
|
|
||||||
|
|
||||||
// sum of the number of search queries in the requests, use with total_received to compute an average
|
|
||||||
total_search_count: usize,
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
show_ranking_score: bool,
|
|
||||||
show_ranking_score_details: bool,
|
|
||||||
|
|
||||||
// federation
|
|
||||||
use_federation: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MultiSearchAggregator {
|
|
||||||
pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
|
|
||||||
let use_federation = federated_search.federation.is_some();
|
|
||||||
|
|
||||||
let distinct_indexes: HashSet<_> = federated_search
|
|
||||||
.queries
|
|
||||||
.iter()
|
|
||||||
.map(|query| {
|
|
||||||
let query = &query;
|
|
||||||
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
|
||||||
let SearchQueryWithIndex {
|
|
||||||
index_uid,
|
|
||||||
federation_options: _,
|
|
||||||
q: _,
|
|
||||||
vector: _,
|
|
||||||
offset: _,
|
|
||||||
limit: _,
|
|
||||||
page: _,
|
|
||||||
hits_per_page: _,
|
|
||||||
attributes_to_retrieve: _,
|
|
||||||
retrieve_vectors: _,
|
|
||||||
attributes_to_crop: _,
|
|
||||||
crop_length: _,
|
|
||||||
attributes_to_highlight: _,
|
|
||||||
show_ranking_score: _,
|
|
||||||
show_ranking_score_details: _,
|
|
||||||
show_matches_position: _,
|
|
||||||
filter: _,
|
|
||||||
sort: _,
|
|
||||||
distinct: _,
|
|
||||||
facets: _,
|
|
||||||
highlight_pre_tag: _,
|
|
||||||
highlight_post_tag: _,
|
|
||||||
crop_marker: _,
|
|
||||||
matching_strategy: _,
|
|
||||||
attributes_to_search_on: _,
|
|
||||||
hybrid: _,
|
|
||||||
ranking_score_threshold: _,
|
|
||||||
locales: _,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
index_uid.as_str()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let show_ranking_score =
|
|
||||||
federated_search.queries.iter().any(|query| query.show_ranking_score);
|
|
||||||
let show_ranking_score_details =
|
|
||||||
federated_search.queries.iter().any(|query| query.show_ranking_score_details);
|
|
||||||
|
|
||||||
Self {
|
|
||||||
total_received: 1,
|
|
||||||
total_succeeded: 0,
|
|
||||||
total_distinct_index_count: distinct_indexes.len(),
|
|
||||||
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
|
||||||
total_search_count: federated_search.queries.len(),
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
use_federation,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn succeed(&mut self) {
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for MultiSearchAggregator {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Documents Searched by Multi-Search POST"
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Aggregate one [MultiSearchAggregator] into another.
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
// write the aggregate in a way that will cause a compilation error if a field is added.
|
|
||||||
|
|
||||||
// get ownership of self, replacing it by a default value.
|
|
||||||
let this = *self;
|
|
||||||
|
|
||||||
let total_received = this.total_received.saturating_add(new.total_received);
|
|
||||||
let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
|
|
||||||
let total_distinct_index_count =
|
|
||||||
this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
|
|
||||||
let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
|
|
||||||
let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
|
|
||||||
let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
|
|
||||||
let show_ranking_score_details =
|
|
||||||
this.show_ranking_score_details || new.show_ranking_score_details;
|
|
||||||
let use_federation = this.use_federation || new.use_federation;
|
|
||||||
|
|
||||||
Box::new(Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
total_distinct_index_count,
|
|
||||||
total_single_index,
|
|
||||||
total_search_count,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
use_federation,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
total_distinct_index_count,
|
|
||||||
total_single_index,
|
|
||||||
total_search_count,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
use_federation,
|
|
||||||
} = *self;
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"requests": {
|
|
||||||
"total_succeeded": total_succeeded,
|
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
||||||
"total_received": total_received,
|
|
||||||
},
|
|
||||||
"indexes": {
|
|
||||||
"total_single_index": total_single_index,
|
|
||||||
"total_distinct_index_count": total_distinct_index_count,
|
|
||||||
"avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
|
|
||||||
},
|
|
||||||
"searches": {
|
|
||||||
"total_search_count": total_search_count,
|
|
||||||
"avg_search_count": (total_search_count as f64) / (total_received as f64),
|
|
||||||
},
|
|
||||||
"scoring": {
|
|
||||||
"show_ranking_score": show_ranking_score,
|
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
|
||||||
},
|
|
||||||
"federation": {
|
|
||||||
"use_federation": use_federation,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -3,6 +3,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
|
|||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
|
use serde_json::json;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::Analytics;
|
||||||
@@ -16,15 +17,13 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
|
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
|
||||||
}
|
}
|
||||||
|
|
||||||
crate::empty_analytics!(SnapshotAnalytics, "Snapshot Created");
|
|
||||||
|
|
||||||
pub async fn create_snapshot(
|
pub async fn create_snapshot(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
analytics.publish(SnapshotAnalytics::default(), &req);
|
analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
|
||||||
|
|
||||||
let task = KindWithContent::SnapshotCreation;
|
let task = KindWithContent::SnapshotCreation;
|
||||||
let uid = get_task_id(&req, &opt)?;
|
let uid = get_task_id(&req, &opt)?;
|
||||||
|
|||||||
@@ -8,10 +8,10 @@ use meilisearch_types::error::deserr_codes::InvalidSwapIndexes;
|
|||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||||
use serde::Serialize;
|
use serde_json::json;
|
||||||
|
|
||||||
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||||
use crate::analytics::{Aggregate, Analytics};
|
use crate::analytics::Analytics;
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
@@ -29,36 +29,21 @@ pub struct SwapIndexesPayload {
|
|||||||
indexes: Vec<IndexUid>,
|
indexes: Vec<IndexUid>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct IndexSwappedAnalytics {
|
|
||||||
swap_operation_number: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for IndexSwappedAnalytics {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Indexes Swapped"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
swap_operation_number: self.swap_operation_number.max(new.swap_operation_number),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn swap_indexes(
|
pub async fn swap_indexes(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
|
||||||
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
|
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
analytics.publish(IndexSwappedAnalytics { swap_operation_number: params.len() }, &req);
|
analytics.publish(
|
||||||
|
"Indexes Swapped".to_string(),
|
||||||
|
json!({
|
||||||
|
"swap_operation_number": params.len(),
|
||||||
|
}),
|
||||||
|
Some(&req),
|
||||||
|
);
|
||||||
let filters = index_scheduler.filters();
|
let filters = index_scheduler.filters();
|
||||||
|
|
||||||
let mut swaps = vec![];
|
let mut swaps = vec![];
|
||||||
|
|||||||
@@ -12,17 +12,18 @@ use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
|||||||
use meilisearch_types::task_view::TaskView;
|
use meilisearch_types::task_view::TaskView;
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use serde_json::json;
|
||||||
use time::format_description::well_known::Rfc3339;
|
use time::format_description::well_known::Rfc3339;
|
||||||
use time::macros::format_description;
|
use time::macros::format_description;
|
||||||
use time::{Date, Duration, OffsetDateTime, Time};
|
use time::{Date, Duration, OffsetDateTime, Time};
|
||||||
use tokio::task;
|
use tokio::task;
|
||||||
|
|
||||||
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||||
use crate::analytics::{Aggregate, AggregateMethod, Analytics};
|
use crate::analytics::Analytics;
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::{aggregate_methods, Opt};
|
use crate::Opt;
|
||||||
|
|
||||||
const DEFAULT_LIMIT: u32 = 20;
|
const DEFAULT_LIMIT: u32 = 20;
|
||||||
|
|
||||||
@@ -157,69 +158,12 @@ impl TaskDeletionOrCancelationQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
aggregate_methods!(
|
|
||||||
CancelTasks => "Tasks Canceled",
|
|
||||||
DeleteTasks => "Tasks Deleted",
|
|
||||||
);
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct TaskFilterAnalytics<Method: AggregateMethod> {
|
|
||||||
filtered_by_uid: bool,
|
|
||||||
filtered_by_index_uid: bool,
|
|
||||||
filtered_by_type: bool,
|
|
||||||
filtered_by_status: bool,
|
|
||||||
filtered_by_canceled_by: bool,
|
|
||||||
filtered_by_before_enqueued_at: bool,
|
|
||||||
filtered_by_after_enqueued_at: bool,
|
|
||||||
filtered_by_before_started_at: bool,
|
|
||||||
filtered_by_after_started_at: bool,
|
|
||||||
filtered_by_before_finished_at: bool,
|
|
||||||
filtered_by_after_finished_at: bool,
|
|
||||||
|
|
||||||
#[serde(skip)]
|
|
||||||
marker: std::marker::PhantomData<Method>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method> {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
Method::event_name()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
Box::new(Self {
|
|
||||||
filtered_by_uid: self.filtered_by_uid | new.filtered_by_uid,
|
|
||||||
filtered_by_index_uid: self.filtered_by_index_uid | new.filtered_by_index_uid,
|
|
||||||
filtered_by_type: self.filtered_by_type | new.filtered_by_type,
|
|
||||||
filtered_by_status: self.filtered_by_status | new.filtered_by_status,
|
|
||||||
filtered_by_canceled_by: self.filtered_by_canceled_by | new.filtered_by_canceled_by,
|
|
||||||
filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at
|
|
||||||
| new.filtered_by_before_enqueued_at,
|
|
||||||
filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at
|
|
||||||
| new.filtered_by_after_enqueued_at,
|
|
||||||
filtered_by_before_started_at: self.filtered_by_before_started_at
|
|
||||||
| new.filtered_by_before_started_at,
|
|
||||||
filtered_by_after_started_at: self.filtered_by_after_started_at
|
|
||||||
| new.filtered_by_after_started_at,
|
|
||||||
filtered_by_before_finished_at: self.filtered_by_before_finished_at
|
|
||||||
| new.filtered_by_before_finished_at,
|
|
||||||
filtered_by_after_finished_at: self.filtered_by_after_finished_at
|
|
||||||
| new.filtered_by_after_finished_at,
|
|
||||||
|
|
||||||
marker: std::marker::PhantomData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
serde_json::to_value(*self).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn cancel_tasks(
|
async fn cancel_tasks(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
|
||||||
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
|
|
||||||
@@ -228,22 +172,21 @@ async fn cancel_tasks(
|
|||||||
}
|
}
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
TaskFilterAnalytics::<CancelTasks> {
|
"Tasks Canceled".to_string(),
|
||||||
filtered_by_uid: params.uids.is_some(),
|
json!({
|
||||||
filtered_by_index_uid: params.index_uids.is_some(),
|
"filtered_by_uid": params.uids.is_some(),
|
||||||
filtered_by_type: params.types.is_some(),
|
"filtered_by_index_uid": params.index_uids.is_some(),
|
||||||
filtered_by_status: params.statuses.is_some(),
|
"filtered_by_type": params.types.is_some(),
|
||||||
filtered_by_canceled_by: params.canceled_by.is_some(),
|
"filtered_by_status": params.statuses.is_some(),
|
||||||
filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
|
"filtered_by_canceled_by": params.canceled_by.is_some(),
|
||||||
filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
|
"filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
|
||||||
filtered_by_before_started_at: params.before_started_at.is_some(),
|
"filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
|
||||||
filtered_by_after_started_at: params.after_started_at.is_some(),
|
"filtered_by_before_started_at": params.before_started_at.is_some(),
|
||||||
filtered_by_before_finished_at: params.before_finished_at.is_some(),
|
"filtered_by_after_started_at": params.after_started_at.is_some(),
|
||||||
filtered_by_after_finished_at: params.after_finished_at.is_some(),
|
"filtered_by_before_finished_at": params.before_finished_at.is_some(),
|
||||||
|
"filtered_by_after_finished_at": params.after_finished_at.is_some(),
|
||||||
marker: std::marker::PhantomData,
|
}),
|
||||||
},
|
Some(&req),
|
||||||
&req,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let query = params.into_query();
|
let query = params.into_query();
|
||||||
@@ -271,7 +214,7 @@ async fn delete_tasks(
|
|||||||
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let params = params.into_inner();
|
let params = params.into_inner();
|
||||||
|
|
||||||
@@ -280,24 +223,22 @@ async fn delete_tasks(
|
|||||||
}
|
}
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
TaskFilterAnalytics::<DeleteTasks> {
|
"Tasks Deleted".to_string(),
|
||||||
filtered_by_uid: params.uids.is_some(),
|
json!({
|
||||||
filtered_by_index_uid: params.index_uids.is_some(),
|
"filtered_by_uid": params.uids.is_some(),
|
||||||
filtered_by_type: params.types.is_some(),
|
"filtered_by_index_uid": params.index_uids.is_some(),
|
||||||
filtered_by_status: params.statuses.is_some(),
|
"filtered_by_type": params.types.is_some(),
|
||||||
filtered_by_canceled_by: params.canceled_by.is_some(),
|
"filtered_by_status": params.statuses.is_some(),
|
||||||
filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
|
"filtered_by_canceled_by": params.canceled_by.is_some(),
|
||||||
filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
|
"filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
|
||||||
filtered_by_before_started_at: params.before_started_at.is_some(),
|
"filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
|
||||||
filtered_by_after_started_at: params.after_started_at.is_some(),
|
"filtered_by_before_started_at": params.before_started_at.is_some(),
|
||||||
filtered_by_before_finished_at: params.before_finished_at.is_some(),
|
"filtered_by_after_started_at": params.after_started_at.is_some(),
|
||||||
filtered_by_after_finished_at: params.after_finished_at.is_some(),
|
"filtered_by_before_finished_at": params.before_finished_at.is_some(),
|
||||||
|
"filtered_by_after_finished_at": params.after_finished_at.is_some(),
|
||||||
marker: std::marker::PhantomData,
|
}),
|
||||||
},
|
Some(&req),
|
||||||
&req,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let query = params.into_query();
|
let query = params.into_query();
|
||||||
|
|
||||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||||
|
|||||||
@@ -267,54 +267,58 @@ impl fmt::Debug for SearchQuery {
|
|||||||
pub struct HybridQuery {
|
pub struct HybridQuery {
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
||||||
pub semantic_ratio: SemanticRatio,
|
pub semantic_ratio: SemanticRatio,
|
||||||
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
|
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||||
pub embedder: String,
|
pub embedder: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum SearchKind {
|
pub enum SearchKind {
|
||||||
KeywordOnly,
|
KeywordOnly,
|
||||||
SemanticOnly { embedder_name: String, embedder: Arc<Embedder>, quantized: bool },
|
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
||||||
Hybrid { embedder_name: String, embedder: Arc<Embedder>, quantized: bool, semantic_ratio: f32 },
|
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchKind {
|
impl SearchKind {
|
||||||
pub(crate) fn semantic(
|
pub(crate) fn semantic(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: &str,
|
embedder_name: Option<&str>,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<Self, ResponseError> {
|
) -> Result<Self, ResponseError> {
|
||||||
let (embedder_name, embedder, quantized) =
|
let (embedder_name, embedder) =
|
||||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||||
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
|
Ok(Self::SemanticOnly { embedder_name, embedder })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn hybrid(
|
pub(crate) fn hybrid(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: &str,
|
embedder_name: Option<&str>,
|
||||||
semantic_ratio: f32,
|
semantic_ratio: f32,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<Self, ResponseError> {
|
) -> Result<Self, ResponseError> {
|
||||||
let (embedder_name, embedder, quantized) =
|
let (embedder_name, embedder) =
|
||||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||||
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
|
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn embedder(
|
pub(crate) fn embedder(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: &str,
|
embedder_name: Option<&str>,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
) -> Result<(String, Arc<Embedder>), ResponseError> {
|
||||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||||
|
|
||||||
let (embedder, _, quantized) = embedders
|
let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name());
|
||||||
.get(embedder_name)
|
|
||||||
|
let embedder = embedders.get(embedder_name);
|
||||||
|
|
||||||
|
let embedder = embedder
|
||||||
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
||||||
.map_err(milli::Error::from)?;
|
.map_err(milli::Error::from)?
|
||||||
|
.0;
|
||||||
|
|
||||||
if let Some(vector_len) = vector_len {
|
if let Some(vector_len) = vector_len {
|
||||||
if vector_len != embedder.dimensions() {
|
if vector_len != embedder.dimensions() {
|
||||||
@@ -328,7 +332,7 @@ impl SearchKind {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((embedder_name.to_owned(), embedder, quantized))
|
Ok((embedder_name.to_owned(), embedder))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -534,8 +538,8 @@ pub struct SimilarQuery {
|
|||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
|
||||||
pub filter: Option<Value>,
|
pub filter: Option<Value>,
|
||||||
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
|
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||||
pub embedder: String,
|
pub embedder: Option<String>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
|
||||||
@@ -789,35 +793,28 @@ fn prepare_search<'t>(
|
|||||||
search.query(q);
|
search.query(q);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SearchKind::SemanticOnly { embedder_name, embedder, quantized } => {
|
SearchKind::SemanticOnly { embedder_name, embedder } => {
|
||||||
let vector = match query.vector.clone() {
|
let vector = match query.vector.clone() {
|
||||||
Some(vector) => vector,
|
Some(vector) => vector,
|
||||||
None => {
|
None => {
|
||||||
let span = tracing::trace_span!(target: "search::vector", "embed_one");
|
let span = tracing::trace_span!(target: "search::vector", "embed_one");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
|
|
||||||
|
|
||||||
embedder
|
embedder
|
||||||
.embed_one(query.q.clone().unwrap(), Some(deadline))
|
.embed_one(query.q.clone().unwrap())
|
||||||
.map_err(milli::vector::Error::from)
|
.map_err(milli::vector::Error::from)
|
||||||
.map_err(milli::Error::from)?
|
.map_err(milli::Error::from)?
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
|
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
|
||||||
}
|
}
|
||||||
SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
|
SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => {
|
||||||
if let Some(q) = &query.q {
|
if let Some(q) = &query.q {
|
||||||
search.query(q);
|
search.query(q);
|
||||||
}
|
}
|
||||||
// will be embedded in hybrid search if necessary
|
// will be embedded in hybrid search if necessary
|
||||||
search.semantic(
|
search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone());
|
||||||
embedder_name.clone(),
|
|
||||||
embedder.clone(),
|
|
||||||
*quantized,
|
|
||||||
query.vector.clone(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1197,13 +1194,8 @@ impl<'a> HitMaker<'a> {
|
|||||||
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
|
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
|
||||||
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
|
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
|
||||||
(None, _) => false,
|
(None, _) => false,
|
||||||
// vectors has no fid, so check its explicit name
|
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
|
||||||
(Some(_), None) => {
|
(Some(_), None) => true,
|
||||||
// unwrap as otherwise we'd go to the first one
|
|
||||||
let displayed_names = index.displayed_fields(rtxn)?.unwrap();
|
|
||||||
!displayed_names
|
|
||||||
.contains(&milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME)
|
|
||||||
}
|
|
||||||
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
|
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
|
||||||
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
|
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
|
||||||
};
|
};
|
||||||
@@ -1451,7 +1443,6 @@ pub fn perform_similar(
|
|||||||
query: SimilarQuery,
|
query: SimilarQuery,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
quantized: bool,
|
|
||||||
retrieve_vectors: RetrieveVectors,
|
retrieve_vectors: RetrieveVectors,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
) -> Result<SimilarResult, ResponseError> {
|
) -> Result<SimilarResult, ResponseError> {
|
||||||
@@ -1480,16 +1471,8 @@ pub fn perform_similar(
|
|||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut similar = milli::Similar::new(
|
let mut similar =
|
||||||
internal_id,
|
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
||||||
offset,
|
|
||||||
limit,
|
|
||||||
index,
|
|
||||||
&rtxn,
|
|
||||||
embedder_name,
|
|
||||||
embedder,
|
|
||||||
quantized,
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Some(ref filter) = query.filter {
|
if let Some(ref filter) = query.filter {
|
||||||
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
||||||
|
|||||||
@@ -381,6 +381,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
db_path: dir.as_ref().join("db"),
|
db_path: dir.as_ref().join("db"),
|
||||||
dump_dir: dir.as_ref().join("dumps"),
|
dump_dir: dir.as_ref().join("dumps"),
|
||||||
env: "development".to_owned(),
|
env: "development".to_owned(),
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
no_analytics: true,
|
no_analytics: true,
|
||||||
max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
|
max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
|
||||||
max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),
|
max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),
|
||||||
|
|||||||
@@ -9,9 +9,8 @@ use actix_web::test;
|
|||||||
use actix_web::test::TestRequest;
|
use actix_web::test::TestRequest;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch::analytics::Analytics;
|
|
||||||
use meilisearch::search_queue::SearchQueue;
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
|
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
|
||||||
use meilisearch_auth::AuthController;
|
use meilisearch_auth::AuthController;
|
||||||
use tracing::level_filters::LevelFilter;
|
use tracing::level_filters::LevelFilter;
|
||||||
use tracing_subscriber::Layer;
|
use tracing_subscriber::Layer;
|
||||||
@@ -142,7 +141,7 @@ impl Service {
|
|||||||
Data::new(search_queue),
|
Data::new(search_queue),
|
||||||
self.options.clone(),
|
self.options.clone(),
|
||||||
(route_layer_handle, stderr_layer_handle),
|
(route_layer_handle, stderr_layer_handle),
|
||||||
Data::new(Analytics::no_analytics()),
|
analytics::MockAnalytics::new(&self.options),
|
||||||
true,
|
true,
|
||||||
))
|
))
|
||||||
.await
|
.await
|
||||||
|
|||||||
@@ -7,9 +7,8 @@ use std::str::FromStr;
|
|||||||
use actix_web::http::header::ContentType;
|
use actix_web::http::header::ContentType;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
use meilisearch::analytics::Analytics;
|
|
||||||
use meilisearch::search_queue::SearchQueue;
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
|
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
|
||||||
use tracing::level_filters::LevelFilter;
|
use tracing::level_filters::LevelFilter;
|
||||||
use tracing_subscriber::layer::SubscriberExt;
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
use tracing_subscriber::Layer;
|
use tracing_subscriber::Layer;
|
||||||
@@ -55,7 +54,7 @@ async fn basic_test_log_stream_route() {
|
|||||||
Data::new(search_queue),
|
Data::new(search_queue),
|
||||||
server.service.options.clone(),
|
server.service.options.clone(),
|
||||||
(route_layer_handle, stderr_layer_handle),
|
(route_layer_handle, stderr_layer_handle),
|
||||||
Data::new(Analytics::no_analytics()),
|
analytics::MockAnalytics::new(&server.service.options),
|
||||||
true,
|
true,
|
||||||
))
|
))
|
||||||
.await;
|
.await;
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ async fn simple_search() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -137,7 +137,7 @@ async fn simple_search() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -146,7 +146,7 @@ async fn simple_search() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -161,7 +161,7 @@ async fn limit_offset() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -174,7 +174,7 @@ async fn limit_offset() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -188,11 +188,8 @@ async fn simple_search_hf() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) =
|
||||||
.search_post(
|
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"0");
|
snapshot!(response["semanticHitCount"], @"0");
|
||||||
@@ -200,7 +197,7 @@ async fn simple_search_hf() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
// disable ranking score as the vectors between architectures are not equal
|
// disable ranking score as the vectors between architectures are not equal
|
||||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}),
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -209,7 +206,7 @@ async fn simple_search_hf() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}),
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -218,7 +215,7 @@ async fn simple_search_hf() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
|
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -227,7 +224,7 @@ async fn simple_search_hf() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
|
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -240,7 +237,7 @@ async fn distribution_shift() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||||
|
|
||||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true});
|
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
|
||||||
let (response, code) = index.search_post(search.clone()).await;
|
let (response, code) = index.search_post(search.clone()).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
||||||
@@ -274,7 +271,7 @@ async fn highlighter() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"embedder": "default", "semanticRatio": 0.2},
|
"hybrid": {"semanticRatio": 0.2},
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
"desc",
|
"desc",
|
||||||
@@ -290,7 +287,7 @@ async fn highlighter() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"embedder": "default", "semanticRatio": 0.8},
|
"hybrid": {"semanticRatio": 0.8},
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
@@ -307,7 +304,7 @@ async fn highlighter() {
|
|||||||
// no highlighting on full semantic
|
// no highlighting on full semantic
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"embedder": "default", "semanticRatio": 1.0},
|
"hybrid": {"semanticRatio": 1.0},
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
@@ -329,7 +326,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@@ -344,7 +341,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@@ -360,7 +357,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_get(
|
.search_get(
|
||||||
&yaup::to_string(
|
&yaup::to_string(
|
||||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}),
|
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
|
||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
@@ -378,7 +375,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_get(
|
.search_get(
|
||||||
&yaup::to_string(
|
&yaup::to_string(
|
||||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}),
|
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
|
||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
@@ -401,7 +398,7 @@ async fn single_document() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -417,7 +414,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// search without query and vector, but with hybrid => still placeholder
|
// search without query and vector, but with hybrid => still placeholder
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -426,7 +423,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// same with a different semantic ratio
|
// same with a different semantic ratio
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -435,7 +432,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// wrong vector dimensions
|
// wrong vector dimensions
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@@ -450,7 +447,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// full vector
|
// full vector
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -459,7 +456,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// full keyword, without a query
|
// full keyword, without a query
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -468,7 +465,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// query + vector, full keyword => keyword
|
// query + vector, full keyword => keyword
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -483,7 +480,7 @@ async fn query_combination() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
|
"message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.",
|
||||||
"code": "missing_search_hybrid",
|
"code": "missing_search_hybrid",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
|
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
|
||||||
@@ -493,7 +490,7 @@ async fn query_combination() {
|
|||||||
// full vector, without a vector => error
|
// full vector, without a vector => error
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -510,7 +507,7 @@ async fn query_combination() {
|
|||||||
// hybrid without a vector => full keyword
|
// hybrid without a vector => full keyword
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -526,58 +523,7 @@ async fn retrieve_vectors() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||||
)
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"title": "Captain Planet",
|
|
||||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
|
||||||
"id": "2",
|
|
||||||
"_vectors": {
|
|
||||||
"default": {
|
|
||||||
"embeddings": "[vectors]",
|
|
||||||
"regenerate": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Captain Marvel",
|
|
||||||
"desc": "a Shazam ersatz",
|
|
||||||
"id": "3",
|
|
||||||
"_vectors": {
|
|
||||||
"default": {
|
|
||||||
"embeddings": "[vectors]",
|
|
||||||
"regenerate": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Shazam!",
|
|
||||||
"desc": "a Captain Marvel ersatz",
|
|
||||||
"id": "1",
|
|
||||||
"_vectors": {
|
|
||||||
"default": {
|
|
||||||
"embeddings": "[vectors]",
|
|
||||||
"regenerate": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// use explicit `_vectors` in displayed attributes
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} ))
|
|
||||||
.await;
|
|
||||||
assert_eq!(202, code, "{:?}", response);
|
|
||||||
index.wait_task(response.uid()).await;
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.search_post(
|
|
||||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
|
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -627,7 +573,7 @@ async fn retrieve_vectors() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
|
|||||||
@@ -922,7 +922,7 @@ async fn invalid_locales() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`",
|
"message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`",
|
||||||
"code": "invalid_search_locales",
|
"code": "invalid_search_locales",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_locales"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_locales"
|
||||||
@@ -935,7 +935,7 @@ async fn invalid_locales() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bg, bn, ca, cs, da, de, el, en, eo, es, et, fa, fi, fr, gu, he, hi, hr, hu, hy, id, it, ja, jv, ka, km, kn, ko, la, lt, lv, mk, ml, mr, my, nb, ne, nl, or, pa, pl, pt, ro, ru, si, sk, sl, sn, sr, sv, ta, te, th, tk, tl, tr, uk, ur, uz, vi, yi, zh, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zho, zul",
|
"message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul",
|
||||||
"code": "invalid_search_locales",
|
"code": "invalid_search_locales",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_locales"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_locales"
|
||||||
@@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() {
|
|||||||
.await;
|
.await;
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`",
|
"message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`",
|
||||||
"code": "invalid_settings_localized_attributes",
|
"code": "invalid_settings_localized_attributes",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
|
"link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
|
||||||
@@ -1143,195 +1143,3 @@ async fn facet_search_with_localized_attributes() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn swedish_search() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
|
|
||||||
let index = server.index("test");
|
|
||||||
let documents = json!([
|
|
||||||
{"id": "tra1-1", "product": "trä"},
|
|
||||||
{"id": "tra2-1", "product": "traktor"},
|
|
||||||
{"id": "tra1-2", "product": "träbjälke"},
|
|
||||||
{"id": "tra2-2", "product": "trafiksignal"},
|
|
||||||
]);
|
|
||||||
index.add_documents(documents, None).await;
|
|
||||||
let (_response, _) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"searchableAttributes": ["product"],
|
|
||||||
"localizedAttributes": [
|
|
||||||
// force swedish
|
|
||||||
{"attributePatterns": ["product"], "locales": ["swe"]}
|
|
||||||
]
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
index.wait_task(1).await;
|
|
||||||
|
|
||||||
// infer swedish
|
|
||||||
index
|
|
||||||
.search(json!({"q": "trä", "attributesToRetrieve": ["product"]}), |response, code| {
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"hits": [
|
|
||||||
{
|
|
||||||
"product": "trä"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"product": "träbjälke"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "trä",
|
|
||||||
"processingTimeMs": "[duration]",
|
|
||||||
"limit": 20,
|
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 2
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
index
|
|
||||||
.search(json!({"q": "tra", "attributesToRetrieve": ["product"]}), |response, code| {
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"hits": [
|
|
||||||
{
|
|
||||||
"product": "traktor"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"product": "trafiksignal"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "tra",
|
|
||||||
"processingTimeMs": "[duration]",
|
|
||||||
"limit": 20,
|
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 2
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
// force swedish
|
|
||||||
index
|
|
||||||
.search(
|
|
||||||
json!({"q": "trä", "locales": ["swe"], "attributesToRetrieve": ["product"]}),
|
|
||||||
|response, code| {
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"hits": [
|
|
||||||
{
|
|
||||||
"product": "trä"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"product": "träbjälke"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "trä",
|
|
||||||
"processingTimeMs": "[duration]",
|
|
||||||
"limit": 20,
|
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 2
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
index
|
|
||||||
.search(
|
|
||||||
json!({"q": "tra", "locales": ["swe"], "attributesToRetrieve": ["product"]}),
|
|
||||||
|response, code| {
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"hits": [
|
|
||||||
{
|
|
||||||
"product": "traktor"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"product": "trafiksignal"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "tra",
|
|
||||||
"processingTimeMs": "[duration]",
|
|
||||||
"limit": 20,
|
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 2
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn german_search() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
|
|
||||||
let index = server.index("test");
|
|
||||||
let documents = json!([
|
|
||||||
{"id": 1, "product": "Interkulturalität"},
|
|
||||||
{"id": 2, "product": "Wissensorganisation"},
|
|
||||||
]);
|
|
||||||
index.add_documents(documents, None).await;
|
|
||||||
let (_response, _) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"searchableAttributes": ["product"],
|
|
||||||
"localizedAttributes": [
|
|
||||||
// force swedish
|
|
||||||
{"attributePatterns": ["product"], "locales": ["deu"]}
|
|
||||||
]
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
index.wait_task(1).await;
|
|
||||||
|
|
||||||
// infer swedish
|
|
||||||
index
|
|
||||||
.search(
|
|
||||||
json!({"q": "kulturalität", "attributesToRetrieve": ["product"]}),
|
|
||||||
|response, code| {
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"hits": [
|
|
||||||
{
|
|
||||||
"product": "Interkulturalität"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "kulturalität",
|
|
||||||
"processingTimeMs": "[duration]",
|
|
||||||
"limit": 20,
|
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 1
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
index
|
|
||||||
.search(
|
|
||||||
json!({"q": "organisation", "attributesToRetrieve": ["product"]}),
|
|
||||||
|response, code| {
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"hits": [
|
|
||||||
{
|
|
||||||
"product": "Wissensorganisation"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "organisation",
|
|
||||||
"processingTimeMs": "[duration]",
|
|
||||||
"limit": 20,
|
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 1
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1099,17 +1099,11 @@ async fn experimental_feature_vector_store() {
|
|||||||
index.add_documents(json!(documents), None).await;
|
index.add_documents(json!(documents), None).await;
|
||||||
index.wait_task(0).await;
|
index.wait_task(0).await;
|
||||||
|
|
||||||
let (response, code) = index
|
index
|
||||||
.search_post(json!({
|
.search(json!({
|
||||||
"vector": [1.0, 2.0, 3.0],
|
"vector": [1.0, 2.0, 3.0],
|
||||||
"hybrid": {
|
|
||||||
"embedder": "manual",
|
|
||||||
},
|
|
||||||
"showRankingScore": true
|
"showRankingScore": true
|
||||||
}))
|
}), |response, code|{
|
||||||
.await;
|
|
||||||
|
|
||||||
{
|
|
||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -1119,8 +1113,8 @@ async fn experimental_feature_vector_store() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
})
|
||||||
|
.await;
|
||||||
index
|
index
|
||||||
.search(json!({
|
.search(json!({
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
@@ -1168,9 +1162,6 @@ async fn experimental_feature_vector_store() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"vector": [1.0, 2.0, 3.0],
|
"vector": [1.0, 2.0, 3.0],
|
||||||
"hybrid": {
|
|
||||||
"embedder": "manual",
|
|
||||||
},
|
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
}))
|
}))
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ async fn similar_unexisting_index() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "embedder": "manual"}), |response, code| {
|
.similar(json!({"id": 287947}), |response, code| {
|
||||||
assert_eq!(code, 404);
|
assert_eq!(code, 404);
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
})
|
})
|
||||||
@@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await;
|
let (response, code) = index.similar_post(json!({"id": 287947})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -199,8 +199,7 @@ async fn similar_not_found_id() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
server.wait_task(response.uid()).await;
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
let (response, code) =
|
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
|
||||||
index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -231,8 +230,7 @@ async fn similar_bad_offset() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
server.wait_task(response.uid()).await;
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
let (response, code) =
|
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
|
||||||
index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -243,7 +241,7 @@ async fn similar_bad_offset() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await;
|
let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -274,8 +272,7 @@ async fn similar_bad_limit() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
server.wait_task(response.uid()).await;
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
let (response, code) =
|
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
|
||||||
index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -286,7 +283,7 @@ async fn similar_bad_limit() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await;
|
let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -326,8 +323,7 @@ async fn similar_bad_filter() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
let (response, code) =
|
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
|
||||||
index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -365,7 +361,7 @@ async fn filter_invalid_syntax_object() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
|
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
@@ -404,7 +400,7 @@ async fn filter_invalid_syntax_array() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
|
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
@@ -450,7 +446,7 @@ async fn filter_invalid_syntax_string() {
|
|||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}),
|
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
@@ -490,13 +486,10 @@ async fn filter_invalid_attribute_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
|
||||||
json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -531,13 +524,10 @@ async fn filter_invalid_attribute_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
|
||||||
json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -572,13 +562,10 @@ async fn filter_reserved_geo_attribute_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
|
||||||
json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -613,13 +600,10 @@ async fn filter_reserved_geo_attribute_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
|
||||||
json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -654,13 +638,10 @@ async fn filter_reserved_attribute_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
|
||||||
json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -695,13 +676,10 @@ async fn filter_reserved_attribute_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
|
||||||
json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -736,13 +714,10 @@ async fn filter_reserved_geo_point_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||||
json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -777,13 +752,10 @@ async fn filter_reserved_geo_point_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
|
||||||
json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -793,8 +765,7 @@ async fn similar_bad_retrieve_vectors() {
|
|||||||
server.set_features(json!({"vectorStore": true})).await;
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
|
|
||||||
let (response, code) =
|
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
|
||||||
index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@@ -805,8 +776,7 @@ async fn similar_bad_retrieve_vectors() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (response, code) =
|
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
|
||||||
index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -80,9 +80,7 @@ async fn basic() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
|
||||||
json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
@@ -156,14 +154,11 @@ async fn basic() {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
|
||||||
json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
@@ -237,8 +232,7 @@ async fn basic() {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -278,7 +272,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
|
||||||
@@ -364,7 +358,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
|
||||||
@@ -432,7 +426,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
|
||||||
@@ -482,7 +476,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
|
||||||
@@ -514,7 +508,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @"[]");
|
snapshot!(json_string!(response["hits"]), @"[]");
|
||||||
@@ -559,7 +553,7 @@ async fn filter() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
@@ -623,7 +617,7 @@ async fn filter() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
@@ -687,9 +681,7 @@ async fn limit_and_offset() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| {
|
||||||
json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}),
|
|
||||||
|response, code| {
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
@@ -712,13 +704,12 @@ async fn limit_and_offset() {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
},
|
})
|
||||||
)
|
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}),
|
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
|||||||
@@ -1,380 +0,0 @@
|
|||||||
use meili_snap::{json_string, snapshot};
|
|
||||||
|
|
||||||
use crate::common::{GetAllDocumentsOptions, Server};
|
|
||||||
use crate::json;
|
|
||||||
use crate::vector::generate_default_user_provided_documents;
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn retrieve_binary_quantize_status_in_the_settings() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("doggo");
|
|
||||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(value, @r###"
|
|
||||||
{
|
|
||||||
"vectorStore": true,
|
|
||||||
"metrics": false,
|
|
||||||
"logsRoute": false,
|
|
||||||
"editDocumentsByFunction": false,
|
|
||||||
"containsFilter": false
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
"binaryQuantized": false,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
"binaryQuantized": true,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn binary_quantize_before_sending_documents() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("doggo");
|
|
||||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(value, @r###"
|
|
||||||
{
|
|
||||||
"vectorStore": true,
|
|
||||||
"metrics": false,
|
|
||||||
"logsRoute": false,
|
|
||||||
"editDocumentsByFunction": false,
|
|
||||||
"containsFilter": false
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
"binaryQuantized": true,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let documents = json!([
|
|
||||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
|
||||||
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
|
||||||
]);
|
|
||||||
let (value, code) = index.add_documents(documents, None).await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
index.wait_task(value.uid()).await.succeeded();
|
|
||||||
|
|
||||||
// Make sure the documents are binary quantized
|
|
||||||
let (documents, _code) = index
|
|
||||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
|
||||||
.await;
|
|
||||||
snapshot!(json_string!(documents), @r###"
|
|
||||||
{
|
|
||||||
"results": [
|
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"name": "kefir",
|
|
||||||
"_vectors": {
|
|
||||||
"manual": {
|
|
||||||
"embeddings": [
|
|
||||||
[
|
|
||||||
-1.0,
|
|
||||||
-1.0,
|
|
||||||
1.0
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"regenerate": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"name": "echo",
|
|
||||||
"_vectors": {
|
|
||||||
"manual": {
|
|
||||||
"embeddings": [
|
|
||||||
[
|
|
||||||
1.0,
|
|
||||||
1.0,
|
|
||||||
-1.0
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"regenerate": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"offset": 0,
|
|
||||||
"limit": 20,
|
|
||||||
"total": 2
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn binary_quantize_after_sending_documents() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("doggo");
|
|
||||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(value, @r###"
|
|
||||||
{
|
|
||||||
"vectorStore": true,
|
|
||||||
"metrics": false,
|
|
||||||
"logsRoute": false,
|
|
||||||
"editDocumentsByFunction": false,
|
|
||||||
"containsFilter": false
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let documents = json!([
|
|
||||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
|
||||||
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
|
||||||
]);
|
|
||||||
let (value, code) = index.add_documents(documents, None).await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
index.wait_task(value.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
"binaryQuantized": true,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
// Make sure the documents are binary quantized
|
|
||||||
let (documents, _code) = index
|
|
||||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
|
||||||
.await;
|
|
||||||
snapshot!(json_string!(documents), @r###"
|
|
||||||
{
|
|
||||||
"results": [
|
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"name": "kefir",
|
|
||||||
"_vectors": {
|
|
||||||
"manual": {
|
|
||||||
"embeddings": [
|
|
||||||
[
|
|
||||||
-1.0,
|
|
||||||
-1.0,
|
|
||||||
1.0
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"regenerate": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"name": "echo",
|
|
||||||
"_vectors": {
|
|
||||||
"manual": {
|
|
||||||
"embeddings": [
|
|
||||||
[
|
|
||||||
1.0,
|
|
||||||
1.0,
|
|
||||||
-1.0
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"regenerate": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"offset": 0,
|
|
||||||
"limit": 20,
|
|
||||||
"total": 2
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn try_to_disable_binary_quantization() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("doggo");
|
|
||||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(value, @r###"
|
|
||||||
{
|
|
||||||
"vectorStore": true,
|
|
||||||
"metrics": false,
|
|
||||||
"logsRoute": false,
|
|
||||||
"editDocumentsByFunction": false,
|
|
||||||
"containsFilter": false
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
"binaryQuantized": true,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
"binaryQuantized": false,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
let ret = server.wait_task(response.uid()).await;
|
|
||||||
snapshot!(ret, @r###"
|
|
||||||
{
|
|
||||||
"uid": "[uid]",
|
|
||||||
"indexUid": "doggo",
|
|
||||||
"status": "failed",
|
|
||||||
"type": "settingsUpdate",
|
|
||||||
"canceledBy": null,
|
|
||||||
"details": {
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"source": "userProvided",
|
|
||||||
"dimensions": 3,
|
|
||||||
"binaryQuantized": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"error": {
|
|
||||||
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
|
||||||
"code": "invalid_settings_embedders",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
|
||||||
},
|
|
||||||
"duration": "[duration]",
|
|
||||||
"enqueuedAt": "[date]",
|
|
||||||
"startedAt": "[date]",
|
|
||||||
"finishedAt": "[date]"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn binary_quantize_clear_documents() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = generate_default_user_provided_documents(&server).await;
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"manual": {
|
|
||||||
"binaryQuantized": true,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
|
||||||
|
|
||||||
let (value, _code) = index.clear_all_documents().await;
|
|
||||||
index.wait_task(value.uid()).await.succeeded();
|
|
||||||
|
|
||||||
// Make sure the documents DB has been cleared
|
|
||||||
let (documents, _code) = index
|
|
||||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
|
||||||
.await;
|
|
||||||
snapshot!(json_string!(documents), @r###"
|
|
||||||
{
|
|
||||||
"results": [],
|
|
||||||
"offset": 0,
|
|
||||||
"limit": 20,
|
|
||||||
"total": 0
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// Make sure the arroy DB has been cleared
|
|
||||||
let (documents, _code) =
|
|
||||||
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
|
|
||||||
snapshot!(documents, @r###"
|
|
||||||
{
|
|
||||||
"hits": [],
|
|
||||||
"query": "",
|
|
||||||
"processingTimeMs": "[duration]",
|
|
||||||
"limit": 20,
|
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 0,
|
|
||||||
"semanticHitCount": 0
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
mod binary_quantized;
|
|
||||||
mod openai;
|
mod openai;
|
||||||
mod rest;
|
mod rest;
|
||||||
mod settings;
|
mod settings;
|
||||||
@@ -625,8 +624,7 @@ async fn clear_documents() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
// Make sure the arroy DB has been cleared
|
// Make sure the arroy DB has been cleared
|
||||||
let (documents, _code) =
|
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||||
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
|
|
||||||
snapshot!(documents, @r###"
|
snapshot!(documents, @r###"
|
||||||
{
|
{
|
||||||
"hits": [],
|
"hits": [],
|
||||||
@@ -687,11 +685,7 @@ async fn add_remove_one_vector_4588() {
|
|||||||
let task = index.wait_task(value.uid()).await;
|
let task = index.wait_task(value.uid()).await;
|
||||||
snapshot!(task, name: "document-deleted");
|
snapshot!(task, name: "document-deleted");
|
||||||
|
|
||||||
let (documents, _code) = index
|
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
|
||||||
.search_post(
|
|
||||||
json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
snapshot!(documents, @r###"
|
snapshot!(documents, @r###"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
|
|||||||
@@ -137,14 +137,13 @@ fn long_text() -> &'static str {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn create_mock_tokenized() -> (MockServer, Value) {
|
async fn create_mock_tokenized() -> (MockServer, Value) {
|
||||||
create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false, false).await
|
create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_mock_with_template(
|
async fn create_mock_with_template(
|
||||||
document_template: &str,
|
document_template: &str,
|
||||||
model_dimensions: ModelDimensions,
|
model_dimensions: ModelDimensions,
|
||||||
fallible: bool,
|
fallible: bool,
|
||||||
slow: bool,
|
|
||||||
) -> (MockServer, Value) {
|
) -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
const API_KEY: &str = "my-api-key";
|
const API_KEY: &str = "my-api-key";
|
||||||
@@ -155,11 +154,7 @@ async fn create_mock_with_template(
|
|||||||
Mock::given(method("POST"))
|
Mock::given(method("POST"))
|
||||||
.and(path("/"))
|
.and(path("/"))
|
||||||
.respond_with(move |req: &Request| {
|
.respond_with(move |req: &Request| {
|
||||||
// 0. wait for a long time
|
// 0. maybe return 500
|
||||||
if slow {
|
|
||||||
std::thread::sleep(std::time::Duration::from_secs(1));
|
|
||||||
}
|
|
||||||
// 1. maybe return 500
|
|
||||||
if fallible {
|
if fallible {
|
||||||
let attempt = attempt.fetch_add(1, Ordering::Relaxed);
|
let attempt = attempt.fetch_add(1, Ordering::Relaxed);
|
||||||
let failed = matches!(attempt % 4, 0 | 1 | 3);
|
let failed = matches!(attempt % 4, 0 | 1 | 3);
|
||||||
@@ -172,7 +167,7 @@ async fn create_mock_with_template(
|
|||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 3. check API key
|
// 1. check API key
|
||||||
match req.headers.get("Authorization") {
|
match req.headers.get("Authorization") {
|
||||||
Some(api_key) if api_key == API_KEY_BEARER => {
|
Some(api_key) if api_key == API_KEY_BEARER => {
|
||||||
{}
|
{}
|
||||||
@@ -207,7 +202,7 @@ async fn create_mock_with_template(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 3. parse text inputs
|
// 2. parse text inputs
|
||||||
let query: serde_json::Value = match req.body_json() {
|
let query: serde_json::Value = match req.body_json() {
|
||||||
Ok(query) => query,
|
Ok(query) => query,
|
||||||
Err(_error) => return ResponseTemplate::new(400).set_body_json(
|
Err(_error) => return ResponseTemplate::new(400).set_body_json(
|
||||||
@@ -228,7 +223,7 @@ async fn create_mock_with_template(
|
|||||||
panic!("Expected {model_dimensions:?}, got {query_model_dimensions:?}")
|
panic!("Expected {model_dimensions:?}, got {query_model_dimensions:?}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. for each text, find embedding in responses
|
// 3. for each text, find embedding in responses
|
||||||
let serde_json::Value::Array(inputs) = &query["input"] else {
|
let serde_json::Value::Array(inputs) = &query["input"] else {
|
||||||
panic!("Unexpected `input` value")
|
panic!("Unexpected `input` value")
|
||||||
};
|
};
|
||||||
@@ -288,7 +283,7 @@ async fn create_mock_with_template(
|
|||||||
"embedding": embedding,
|
"embedding": embedding,
|
||||||
})).collect();
|
})).collect();
|
||||||
|
|
||||||
// 5. produce output from embeddings
|
// 4. produce output from embeddings
|
||||||
ResponseTemplate::new(200).set_body_json(json!({
|
ResponseTemplate::new(200).set_body_json(json!({
|
||||||
"object": "list",
|
"object": "list",
|
||||||
"data": data,
|
"data": data,
|
||||||
@@ -322,27 +317,23 @@ const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée
|
|||||||
{%- endif %}, de race {{doc.breed}}."#;
|
{%- endif %}, de race {{doc.breed}}."#;
|
||||||
|
|
||||||
async fn create_mock() -> (MockServer, Value) {
|
async fn create_mock() -> (MockServer, Value) {
|
||||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false, false).await
|
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_mock_dimensions() -> (MockServer, Value) {
|
async fn create_mock_dimensions() -> (MockServer, Value) {
|
||||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false, false).await
|
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_mock_small_embedding_model() -> (MockServer, Value) {
|
async fn create_mock_small_embedding_model() -> (MockServer, Value) {
|
||||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false, false).await
|
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
|
async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
|
||||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false, false).await
|
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_fallible_mock() -> (MockServer, Value) {
|
async fn create_fallible_mock() -> (MockServer, Value) {
|
||||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, false).await
|
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true).await
|
||||||
}
|
|
||||||
|
|
||||||
async fn create_slow_mock() -> (MockServer, Value) {
|
|
||||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, true).await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// basic test "it works"
|
// basic test "it works"
|
||||||
@@ -458,7 +449,7 @@ async fn it_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"},
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -498,7 +489,7 @@ async fn it_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -538,7 +529,7 @@ async fn it_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -625,7 +616,7 @@ async fn tokenize_long_text() {
|
|||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToRetrieve": ["id"],
|
"attributesToRetrieve": ["id"],
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1073,7 +1064,7 @@ async fn smaller_dimensions() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1113,7 +1104,7 @@ async fn smaller_dimensions() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1153,7 +1144,7 @@ async fn smaller_dimensions() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1304,7 +1295,7 @@ async fn small_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1344,7 +1335,7 @@ async fn small_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1384,7 +1375,7 @@ async fn small_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1534,7 +1525,7 @@ async fn legacy_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1574,7 +1565,7 @@ async fn legacy_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1614,7 +1605,7 @@ async fn legacy_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1765,7 +1756,7 @@ async fn it_still_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1805,7 +1796,7 @@ async fn it_still_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1845,7 +1836,7 @@ async fn it_still_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
"hybrid": {"semanticRatio": 1.0}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@@ -1882,114 +1873,4 @@ async fn it_still_works() {
|
|||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
// test with a server that responds 500 on 3 out of 4 calls
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn timeout() {
|
|
||||||
let (_mock, setting) = create_slow_mock().await;
|
|
||||||
let server = get_server_vector().await;
|
|
||||||
let index = server.index("doggo");
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": {
|
|
||||||
"default": setting,
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
let task = server.wait_task(response.uid()).await;
|
|
||||||
snapshot!(task["status"], @r###""succeeded""###);
|
|
||||||
let documents = json!([
|
|
||||||
{"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
|
|
||||||
]);
|
|
||||||
let (value, code) = index.add_documents(documents, None).await;
|
|
||||||
snapshot!(code, @"202 Accepted");
|
|
||||||
let task = index.wait_task(value.uid()).await;
|
|
||||||
snapshot!(task, @r###"
|
|
||||||
{
|
|
||||||
"uid": "[uid]",
|
|
||||||
"indexUid": "doggo",
|
|
||||||
"status": "succeeded",
|
|
||||||
"type": "documentAdditionOrUpdate",
|
|
||||||
"canceledBy": null,
|
|
||||||
"details": {
|
|
||||||
"receivedDocuments": 1,
|
|
||||||
"indexedDocuments": 1
|
|
||||||
},
|
|
||||||
"error": null,
|
|
||||||
"duration": "[duration]",
|
|
||||||
"enqueuedAt": "[date]",
|
|
||||||
"startedAt": "[date]",
|
|
||||||
"finishedAt": "[date]"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (documents, _code) = index
|
|
||||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
|
||||||
.await;
|
|
||||||
snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
|
|
||||||
{
|
|
||||||
"results": [
|
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"name": "kefir",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 2023,
|
|
||||||
"breed": "Patou",
|
|
||||||
"_vectors": {
|
|
||||||
"default": {
|
|
||||||
"embeddings": "[vector]",
|
|
||||||
"regenerate": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"offset": 0,
|
|
||||||
"limit": 20,
|
|
||||||
"total": 1
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.search_post(json!({
|
|
||||||
"q": "grand chien de berger des montagnes",
|
|
||||||
"hybrid": {"semanticRatio": 0.99, "embedder": "default"}
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(json_string!(response["semanticHitCount"]), @"0");
|
|
||||||
snapshot!(json_string!(response["hits"]), @"[]");
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.search_post(json!({
|
|
||||||
"q": "grand chien de berger des montagnes",
|
|
||||||
"hybrid": {"semanticRatio": 0.99, "embedder": "default"}
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(json_string!(response["semanticHitCount"]), @"1");
|
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"name": "kefir",
|
|
||||||
"gender": "M",
|
|
||||||
"birthyear": 2023,
|
|
||||||
"breed": "Patou"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.search_post(json!({
|
|
||||||
"q": "grand chien de berger des montagnes",
|
|
||||||
"hybrid": {"semanticRatio": 0.99, "embedder": "default"}
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(json_string!(response["semanticHitCount"]), @"0");
|
|
||||||
snapshot!(json_string!(response["hits"]), @"[]");
|
|
||||||
}
|
|
||||||
|
|
||||||
// test with a server that wrongly responds 400
|
// test with a server that wrongly responds 400
|
||||||
|
|||||||
@@ -4,53 +4,6 @@ use crate::common::{GetAllDocumentsOptions, Server};
|
|||||||
use crate::json;
|
use crate::json;
|
||||||
use crate::vector::generate_default_user_provided_documents;
|
use crate::vector::generate_default_user_provided_documents;
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn field_unavailable_for_source() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("doggo");
|
|
||||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(value, @r###"
|
|
||||||
{
|
|
||||||
"vectorStore": true,
|
|
||||||
"metrics": false,
|
|
||||||
"logsRoute": false,
|
|
||||||
"editDocumentsByFunction": false,
|
|
||||||
"containsFilter": false
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": { "manual": {"source": "userProvided", "documentTemplate": "{{doc.documentTemplate}}"}},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided` (only available for sources: `huggingFace`, `openAi`, `ollama`, `rest`). Available fields: `source`, `dimensions`, `distribution`, `binaryQuantized`",
|
|
||||||
"code": "invalid_settings_embedders",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = index
|
|
||||||
.update_settings(json!({
|
|
||||||
"embedders": { "default": {"source": "openAi", "revision": "42"}},
|
|
||||||
}))
|
|
||||||
.await;
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
|
||||||
snapshot!(response, @r###"
|
|
||||||
{
|
|
||||||
"message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `dimensions`, `distribution`, `url`, `binaryQuantized`",
|
|
||||||
"code": "invalid_settings_embedders",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn update_embedder() {
|
async fn update_embedder() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
@@ -265,8 +218,7 @@ async fn reset_embedder_documents() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
// Make sure the arroy DB has been cleared
|
// Make sure the arroy DB has been cleared
|
||||||
let (documents, _code) =
|
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||||
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
|
|
||||||
snapshot!(json_string!(documents), @r###"
|
snapshot!(json_string!(documents), @r###"
|
||||||
{
|
{
|
||||||
"message": "Cannot find embedder with name `default`.",
|
"message": "Cannot find embedder with name `default`.",
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
|||||||
bstr = "1.9.1"
|
bstr = "1.9.1"
|
||||||
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
|
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
|
||||||
byteorder = "1.5.0"
|
byteorder = "1.5.0"
|
||||||
charabia = { version = "0.9.1", default-features = false }
|
charabia = { version = "0.9.0", default-features = false }
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
crossbeam-channel = "0.5.13"
|
crossbeam-channel = "0.5.13"
|
||||||
deserr = "0.6.2"
|
deserr = "0.6.2"
|
||||||
@@ -79,8 +79,8 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
|||||||
] }
|
] }
|
||||||
tiktoken-rs = "0.5.9"
|
tiktoken-rs = "0.5.9"
|
||||||
liquid = "0.26.6"
|
liquid = "0.26.6"
|
||||||
rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
||||||
arroy = "0.5.0"
|
arroy = "0.4.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = { version = "2.10.0", features = ["json"] }
|
ureq = { version = "2.10.0", features = ["json"] }
|
||||||
@@ -98,7 +98,14 @@ rand = { version = "0.8.5", features = ["small_rng"] }
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
all-tokenizations = [
|
all-tokenizations = [
|
||||||
"charabia/default",
|
"charabia/chinese",
|
||||||
|
"charabia/hebrew",
|
||||||
|
"charabia/japanese",
|
||||||
|
"charabia/thai",
|
||||||
|
"charabia/korean",
|
||||||
|
"charabia/greek",
|
||||||
|
"charabia/khmer",
|
||||||
|
"charabia/vietnamese",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||||
@@ -131,14 +138,8 @@ khmer = ["charabia/khmer"]
|
|||||||
# allow vietnamese specialized tokenization
|
# allow vietnamese specialized tokenization
|
||||||
vietnamese = ["charabia/vietnamese"]
|
vietnamese = ["charabia/vietnamese"]
|
||||||
|
|
||||||
# allow german specialized tokenization
|
|
||||||
german = ["charabia/german-segmentation"]
|
|
||||||
|
|
||||||
# force swedish character recomposition
|
# force swedish character recomposition
|
||||||
swedish-recomposition = ["charabia/swedish-recomposition"]
|
swedish-recomposition = ["charabia/swedish-recomposition"]
|
||||||
|
|
||||||
# allow turkish specialized tokenization
|
|
||||||
turkish = ["charabia/turkish"]
|
|
||||||
|
|
||||||
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
||||||
cuda = ["candle-core/cuda"]
|
cuda = ["candle-core/cuda"]
|
||||||
|
|||||||
@@ -258,10 +258,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
},
|
},
|
||||||
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
||||||
InvalidSettingsDimensions { embedder_name: String },
|
InvalidSettingsDimensions { embedder_name: String },
|
||||||
#[error(
|
|
||||||
"`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
|
|
||||||
)]
|
|
||||||
InvalidDisableBinaryQuantization { embedder_name: String },
|
|
||||||
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
|
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
|
||||||
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
|
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
|
||||||
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
||||||
@@ -297,7 +293,6 @@ impl From<arroy::Error> for Error {
|
|||||||
arroy::Error::InvalidVecDimension { expected, received } => {
|
arroy::Error::InvalidVecDimension { expected, received } => {
|
||||||
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
|
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
|
||||||
}
|
}
|
||||||
arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
|
|
||||||
arroy::Error::DatabaseFull
|
arroy::Error::DatabaseFull
|
||||||
| arroy::Error::InvalidItemAppend
|
| arroy::Error::InvalidItemAppend
|
||||||
| arroy::Error::UnmatchingDistance { .. }
|
| arroy::Error::UnmatchingDistance { .. }
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
|||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||||
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
|
use crate::vector::{Embedding, EmbeddingConfig};
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||||
@@ -162,7 +162,7 @@ pub struct Index {
|
|||||||
/// Maps an embedder name to its id in the arroy store.
|
/// Maps an embedder name to its id in the arroy store.
|
||||||
pub embedder_category_id: Database<Str, U8>,
|
pub embedder_category_id: Database<Str, U8>,
|
||||||
/// Vector store based on arroy™.
|
/// Vector store based on arroy™.
|
||||||
pub vector_arroy: arroy::Database<Unspecified>,
|
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
|
||||||
|
|
||||||
/// Maps the document id to the document as an obkv store.
|
/// Maps the document id to the document as an obkv store.
|
||||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||||
@@ -1610,6 +1610,22 @@ impl Index {
|
|||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn arroy_readers<'a>(
|
||||||
|
&'a self,
|
||||||
|
rtxn: &'a RoTxn<'a>,
|
||||||
|
embedder_id: u8,
|
||||||
|
) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a {
|
||||||
|
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
||||||
|
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
||||||
|
.map(Some)
|
||||||
|
.or_else(|e| match e {
|
||||||
|
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||||
|
e => Err(e.into()),
|
||||||
|
})
|
||||||
|
.transpose()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
|
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
|
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
|
||||||
}
|
}
|
||||||
@@ -1628,13 +1644,32 @@ impl Index {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
||||||
let mut res = BTreeMap::new();
|
let mut res = BTreeMap::new();
|
||||||
let embedding_configs = self.embedding_configs(rtxn)?;
|
for row in self.embedder_category_id.iter(rtxn)? {
|
||||||
for config in embedding_configs {
|
let (embedder_name, embedder_id) = row?;
|
||||||
let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
|
let embedder_id = (embedder_id as u16) << 8;
|
||||||
let reader =
|
let mut embeddings = Vec::new();
|
||||||
ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
|
'vectors: for i in 0..=u8::MAX {
|
||||||
let embeddings = reader.item_vectors(rtxn, docid)?;
|
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
||||||
res.insert(config.name.to_owned(), embeddings);
|
.map(Some)
|
||||||
|
.or_else(|e| match e {
|
||||||
|
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||||
|
e => Err(e),
|
||||||
|
})
|
||||||
|
.transpose();
|
||||||
|
|
||||||
|
let Some(reader) = reader else {
|
||||||
|
break 'vectors;
|
||||||
|
};
|
||||||
|
|
||||||
|
let embedding = reader?.item_vector(rtxn, docid)?;
|
||||||
|
if let Some(embedding) = embedding {
|
||||||
|
embeddings.push(embedding)
|
||||||
|
} else {
|
||||||
|
break 'vectors;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.insert(embedder_name.to_owned(), embeddings);
|
||||||
}
|
}
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -190,7 +190,7 @@ impl<'a> Search<'a> {
|
|||||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
// no embedder, no semantic search
|
// no embedder, no semantic search
|
||||||
let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
|
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
||||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -201,9 +201,7 @@ impl<'a> Search<'a> {
|
|||||||
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
|
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
|
match embedder.embed_one(query) {
|
||||||
|
|
||||||
match embedder.embed_one(query, Some(deadline)) {
|
|
||||||
Ok(embedding) => embedding,
|
Ok(embedding) => embedding,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
tracing::error!(error=%error, "Embedding failed");
|
tracing::error!(error=%error, "Embedding failed");
|
||||||
@@ -214,7 +212,7 @@ impl<'a> Search<'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
search.semantic =
|
search.semantic =
|
||||||
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });
|
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder });
|
||||||
|
|
||||||
// TODO: would be better to have two distinct functions at this point
|
// TODO: would be better to have two distinct functions at this point
|
||||||
let vector_results = search.execute()?;
|
let vector_results = search.execute()?;
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ pub struct SemanticSearch {
|
|||||||
vector: Option<Vec<f32>>,
|
vector: Option<Vec<f32>>,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
quantized: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Search<'a> {
|
pub struct Search<'a> {
|
||||||
@@ -90,10 +89,9 @@ impl<'a> Search<'a> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
quantized: bool,
|
|
||||||
vector: Option<Vec<f32>>,
|
vector: Option<Vec<f32>>,
|
||||||
) -> &mut Search<'a> {
|
) -> &mut Search<'a> {
|
||||||
self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
|
self.semantic = Some(SemanticSearch { embedder_name, embedder, vector });
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -208,7 +206,7 @@ impl<'a> Search<'a> {
|
|||||||
degraded,
|
degraded,
|
||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
} = match self.semantic.as_ref() {
|
} = match self.semantic.as_ref() {
|
||||||
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
|
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => {
|
||||||
execute_vector_search(
|
execute_vector_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
vector,
|
vector,
|
||||||
@@ -221,7 +219,6 @@ impl<'a> Search<'a> {
|
|||||||
self.limit,
|
self.limit,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
*quantized,
|
|
||||||
self.time_budget.clone(),
|
self.time_budget.clone(),
|
||||||
self.ranking_score_threshold,
|
self.ranking_score_threshold,
|
||||||
)?
|
)?
|
||||||
|
|||||||
@@ -312,7 +312,6 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
Ok(ranking_rules)
|
Ok(ranking_rules)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
|
||||||
fn get_ranking_rules_for_vector<'ctx>(
|
fn get_ranking_rules_for_vector<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
@@ -321,7 +320,6 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
target: &[f32],
|
target: &[f32],
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
quantized: bool,
|
|
||||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||||
// query graph search
|
// query graph search
|
||||||
|
|
||||||
@@ -349,7 +347,6 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
limit_plus_offset,
|
limit_plus_offset,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
quantized,
|
|
||||||
)?;
|
)?;
|
||||||
ranking_rules.push(Box::new(vector_sort));
|
ranking_rules.push(Box::new(vector_sort));
|
||||||
vector = true;
|
vector = true;
|
||||||
@@ -579,7 +576,6 @@ pub fn execute_vector_search(
|
|||||||
length: usize,
|
length: usize,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
quantized: bool,
|
|
||||||
time_budget: TimeBudget,
|
time_budget: TimeBudget,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
@@ -595,7 +591,6 @@ pub fn execute_vector_search(
|
|||||||
vector,
|
vector,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
quantized,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||||
use crate::score_details::{self, ScoreDetails};
|
use crate::score_details::{self, ScoreDetails};
|
||||||
use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
|
use crate::vector::{DistributionShift, Embedder};
|
||||||
use crate::{DocumentId, Result, SearchContext, SearchLogger};
|
use crate::{DocumentId, Result, SearchContext, SearchLogger};
|
||||||
|
|
||||||
pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
||||||
@@ -15,7 +16,6 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
distribution_shift: Option<DistributionShift>,
|
distribution_shift: Option<DistributionShift>,
|
||||||
embedder_index: u8,
|
embedder_index: u8,
|
||||||
quantized: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||||
@@ -26,7 +26,6 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
quantized: bool,
|
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let embedder_index = ctx
|
let embedder_index = ctx
|
||||||
.index
|
.index
|
||||||
@@ -42,7 +41,6 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
limit,
|
limit,
|
||||||
distribution_shift: embedder.distribution(),
|
distribution_shift: embedder.distribution(),
|
||||||
embedder_index,
|
embedder_index,
|
||||||
quantized,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,10 +49,19 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
vector_candidates: &RoaringBitmap,
|
vector_candidates: &RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let target = &self.target;
|
let readers: std::result::Result<Vec<_>, _> =
|
||||||
|
ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
|
||||||
|
let readers = readers?;
|
||||||
|
|
||||||
let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
|
let target = &self.target;
|
||||||
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
let mut results = Vec::new();
|
||||||
|
|
||||||
|
for reader in readers.iter() {
|
||||||
|
let nns_by_vector =
|
||||||
|
reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?;
|
||||||
|
results.extend(nns_by_vector.into_iter());
|
||||||
|
}
|
||||||
|
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||||
self.cached_sorted_docids = results.into_iter();
|
self.cached_sorted_docids = results.into_iter();
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::score_details::{self, ScoreDetails};
|
use crate::score_details::{self, ScoreDetails};
|
||||||
use crate::vector::{ArroyWrapper, Embedder};
|
use crate::vector::Embedder;
|
||||||
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
|
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
|
||||||
|
|
||||||
pub struct Similar<'a> {
|
pub struct Similar<'a> {
|
||||||
@@ -17,11 +18,9 @@ pub struct Similar<'a> {
|
|||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
quantized: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Similar<'a> {
|
impl<'a> Similar<'a> {
|
||||||
#[allow(clippy::too_many_arguments)]
|
|
||||||
pub fn new(
|
pub fn new(
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
@@ -30,7 +29,6 @@ impl<'a> Similar<'a> {
|
|||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
quantized: bool,
|
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
id,
|
id,
|
||||||
@@ -42,7 +40,6 @@ impl<'a> Similar<'a> {
|
|||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
ranking_score_threshold: None,
|
ranking_score_threshold: None,
|
||||||
quantized,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -70,13 +67,29 @@ impl<'a> Similar<'a> {
|
|||||||
.get(self.rtxn, &self.embedder_name)?
|
.get(self.rtxn, &self.embedder_name)?
|
||||||
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
||||||
|
|
||||||
let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized);
|
let readers: std::result::Result<Vec<_>, _> =
|
||||||
let results = reader.nns_by_item(
|
self.index.arroy_readers(self.rtxn, embedder_index).collect();
|
||||||
|
|
||||||
|
let readers = readers?;
|
||||||
|
|
||||||
|
let mut results = Vec::new();
|
||||||
|
|
||||||
|
for reader in readers.iter() {
|
||||||
|
let nns_by_item = reader.nns_by_item(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.id,
|
self.id,
|
||||||
self.limit + self.offset + 1,
|
self.limit + self.offset + 1,
|
||||||
|
None,
|
||||||
Some(&universe),
|
Some(&universe),
|
||||||
)?;
|
)?;
|
||||||
|
if let Some(mut nns_by_item) = nns_by_item {
|
||||||
|
results.append(&mut nns_by_item);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||||
|
|
||||||
let mut documents_ids = Vec::with_capacity(self.limit);
|
let mut documents_ids = Vec::with_capacity(self.limit);
|
||||||
let mut document_scores = Vec::with_capacity(self.limit);
|
let mut document_scores = Vec::with_capacity(self.limit);
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
|||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
||||||
use crate::vector::settings::ReindexAction;
|
use crate::vector::settings::{EmbedderAction, ReindexAction};
|
||||||
use crate::vector::{Embedder, Embeddings};
|
use crate::vector::{Embedder, Embeddings};
|
||||||
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
||||||
|
|
||||||
@@ -208,9 +208,10 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
if reindex_vectors {
|
if reindex_vectors {
|
||||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||||
if let Some(action) = action.reindex() {
|
match action {
|
||||||
let Some((embedder_name, (embedder, prompt, _quantized))) =
|
EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted
|
||||||
configs.remove_entry(name)
|
EmbedderAction::Reindex(action) => {
|
||||||
|
let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name)
|
||||||
else {
|
else {
|
||||||
tracing::error!(embedder = name, "Requested embedder config not found");
|
tracing::error!(embedder = name, "Requested embedder config not found");
|
||||||
continue;
|
continue;
|
||||||
@@ -240,7 +241,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
let action = match action {
|
let action = match action {
|
||||||
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
||||||
ReindexAction::RegeneratePrompts => {
|
ReindexAction::RegeneratePrompts => {
|
||||||
let Some((_, old_prompt, _quantized)) = old_configs.get(name) else {
|
let Some((_, old_prompt)) = old_configs.get(name) else {
|
||||||
tracing::error!(embedder = name, "Old embedder config not found");
|
tracing::error!(embedder = name, "Old embedder config not found");
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
@@ -259,14 +260,13 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
add_to_user_provided: RoaringBitmap::new(),
|
add_to_user_provided: RoaringBitmap::new(),
|
||||||
action,
|
action,
|
||||||
});
|
});
|
||||||
} else {
|
}
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// document operation
|
// document operation
|
||||||
|
|
||||||
for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() {
|
for (embedder_name, (embedder, prompt)) in configs.into_iter() {
|
||||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||||
let manual_vectors_writer = create_writer(
|
let manual_vectors_writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ use crate::index::IndexEmbeddingConfig;
|
|||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::error::PossibleEmbeddingMistakes;
|
use crate::vector::error::PossibleEmbeddingMistakes;
|
||||||
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||||
|
/// Hello!
|
||||||
/// Extract data for each databases from obkv documents in parallel.
|
/// Extract data for each databases from obkv documents in parallel.
|
||||||
/// Send data in grenad file over provided Sender.
|
/// Send data in grenad file over provided Sender.
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
|
|||||||
use crate::update::{
|
use crate::update::{
|
||||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||||
};
|
};
|
||||||
use crate::vector::{ArroyWrapper, EmbeddingConfigs};
|
use crate::vector::EmbeddingConfigs;
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
||||||
|
|
||||||
static MERGED_DATABASE_COUNT: usize = 7;
|
static MERGED_DATABASE_COUNT: usize = 7;
|
||||||
@@ -679,42 +679,23 @@ where
|
|||||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
|
|
||||||
// If an embedder wasn't used in the typedchunk but must be binary quantized
|
|
||||||
// we should insert it in `dimension`
|
|
||||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
|
||||||
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
|
|
||||||
let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or(
|
|
||||||
InternalError::DatabaseMissingEntry {
|
|
||||||
db_name: "embedder_category_id",
|
|
||||||
key: None,
|
|
||||||
},
|
|
||||||
)?;
|
|
||||||
let reader =
|
|
||||||
ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized);
|
|
||||||
let dim = reader.dimensions(self.wtxn)?;
|
|
||||||
dimension.insert(name.to_string(), dim);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (embedder_name, dimension) in dimension {
|
for (embedder_name, dimension) in dimension {
|
||||||
let wtxn = &mut *self.wtxn;
|
let wtxn = &mut *self.wtxn;
|
||||||
let vector_arroy = self.index.vector_arroy;
|
let vector_arroy = self.index.vector_arroy;
|
||||||
let cancel = &self.should_abort;
|
|
||||||
|
|
||||||
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||||
)?;
|
)?;
|
||||||
let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
|
|
||||||
let was_quantized = settings_diff
|
|
||||||
.old
|
|
||||||
.embedding_configs
|
|
||||||
.get(&embedder_name)
|
|
||||||
.map_or(false, |conf| conf.2);
|
|
||||||
let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
|
|
||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
||||||
writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?;
|
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
||||||
|
if writer.need_build(wtxn)? {
|
||||||
|
writer.build(wtxn, &mut rng, None)?;
|
||||||
|
} else if writer.is_empty(wtxn)? {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
Result::Ok(())
|
Result::Ok(())
|
||||||
})
|
})
|
||||||
.map_err(InternalError::from)??;
|
.map_err(InternalError::from)??;
|
||||||
@@ -2765,7 +2746,6 @@ mod tests {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: Setting::NotSet,
|
distribution: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
binary_quantized: Setting::NotSet,
|
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
settings.set_embedder_settings(embedders);
|
settings.set_embedder_settings(embedders);
|
||||||
@@ -2794,7 +2774,7 @@ mod tests {
|
|||||||
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
||||||
let res = index
|
let res = index
|
||||||
.search(&rtxn)
|
.search(&rtxn)
|
||||||
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
|
.semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec()))
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(res.documents_ids.len(), 3);
|
assert_eq!(res.documents_ids.len(), 3);
|
||||||
|
|||||||
@@ -28,8 +28,7 @@ use crate::update::index_documents::GrenadParameters;
|
|||||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use crate::vector::settings::WriteBackToDocuments;
|
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
|
||||||
use crate::vector::ArroyWrapper;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||||
};
|
};
|
||||||
@@ -990,24 +989,29 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
|
let readers: Result<
|
||||||
|
BTreeMap<&str, (Vec<arroy::Reader<'_, arroy::distances::Angular>>, &RoaringBitmap)>,
|
||||||
|
> = settings_diff
|
||||||
.embedding_config_updates
|
.embedding_config_updates
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|(name, action)| {
|
.filter_map(|(name, action)| {
|
||||||
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
|
if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
||||||
action.write_back()
|
embedder_id,
|
||||||
|
user_provided,
|
||||||
|
}) = action
|
||||||
{
|
{
|
||||||
let reader = ArroyWrapper::new(
|
let readers: Result<Vec<_>> =
|
||||||
self.index.vector_arroy,
|
self.index.arroy_readers(wtxn, *embedder_id).collect();
|
||||||
*embedder_id,
|
match readers {
|
||||||
action.was_quantized,
|
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
|
||||||
);
|
Err(error) => Some(Err(error)),
|
||||||
Some((name.as_str(), (reader, user_provided)))
|
}
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
let readers = readers?;
|
||||||
|
|
||||||
let old_vectors_fid = settings_diff
|
let old_vectors_fid = settings_diff
|
||||||
.old
|
.old
|
||||||
@@ -1045,24 +1049,34 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
arroy::Error,
|
arroy::Error,
|
||||||
> = readers
|
> = readers
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|(name, (reader, user_provided))| {
|
.filter_map(|(name, (readers, user_provided))| {
|
||||||
if !user_provided.contains(docid) {
|
if !user_provided.contains(docid) {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
match reader.item_vectors(wtxn, docid) {
|
let mut vectors = Vec::new();
|
||||||
Ok(vectors) if vectors.is_empty() => None,
|
for reader in readers {
|
||||||
Ok(vectors) => Some(Ok((
|
let Some(vector) = reader.item_vector(wtxn, docid).transpose() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
match vector {
|
||||||
|
Ok(vector) => vectors.push(vector),
|
||||||
|
Err(error) => return Some(Err(error)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if vectors.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(Ok((
|
||||||
name.to_string(),
|
name.to_string(),
|
||||||
serde_json::to_value(ExplicitVectors {
|
serde_json::to_value(ExplicitVectors {
|
||||||
embeddings: Some(
|
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||||
VectorOrArrayOfVectors::from_array_of_vectors(vectors),
|
vectors,
|
||||||
),
|
)),
|
||||||
regenerate: false,
|
regenerate: false,
|
||||||
})
|
})
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
))),
|
)))
|
||||||
Err(e) => Some(Err(e)),
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@@ -1090,10 +1104,21 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut writers = Vec::new();
|
||||||
|
|
||||||
// delete all vectors from the embedders that need removal
|
// delete all vectors from the embedders that need removal
|
||||||
for (_, (reader, _)) in readers {
|
for (_, (readers, _)) in readers {
|
||||||
let dimensions = reader.dimensions(wtxn)?;
|
for reader in readers {
|
||||||
reader.clear(wtxn, dimensions)?;
|
let dimensions = reader.dimensions();
|
||||||
|
let arroy_index = reader.index();
|
||||||
|
drop(reader);
|
||||||
|
let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions);
|
||||||
|
writers.push(writer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for writer in writers {
|
||||||
|
writer.clear(wtxn)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let grenad_params = GrenadParameters {
|
let grenad_params = GrenadParameters {
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ use crate::update::index_documents::helpers::{
|
|||||||
as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
|
as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
|
||||||
};
|
};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::ArroyWrapper;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||||
Result, SerializationError, U8StrStrCodec,
|
Result, SerializationError, U8StrStrCodec,
|
||||||
@@ -667,20 +666,23 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||||
)?;
|
)?;
|
||||||
let binary_quantized = settings_diff
|
|
||||||
.old
|
|
||||||
.embedding_configs
|
|
||||||
.get(&embedder_name)
|
|
||||||
.map_or(false, |conf| conf.2);
|
|
||||||
// FIXME: allow customizing distance
|
// FIXME: allow customizing distance
|
||||||
let writer = ArroyWrapper::new(index.vector_arroy, embedder_index, binary_quantized);
|
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
|
||||||
|
.map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension))
|
||||||
|
.collect();
|
||||||
|
|
||||||
// remove vectors for docids we want them removed
|
// remove vectors for docids we want them removed
|
||||||
let merger = remove_vectors_builder.build();
|
let merger = remove_vectors_builder.build();
|
||||||
let mut iter = merger.into_stream_merger_iter()?;
|
let mut iter = merger.into_stream_merger_iter()?;
|
||||||
while let Some((key, _)) = iter.next()? {
|
while let Some((key, _)) = iter.next()? {
|
||||||
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||||
writer.del_items(wtxn, expected_dimension, docid)?;
|
|
||||||
|
for writer in &writers {
|
||||||
|
// Uses invariant: vectors are packed in the first writers.
|
||||||
|
if !writer.del_item(wtxn, docid)? {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// add generated embeddings
|
// add generated embeddings
|
||||||
@@ -708,7 +710,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
embeddings.embedding_count(),
|
embeddings.embedding_count(),
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
writer.add_items(wtxn, docid, &embeddings)?;
|
for (embedding, writer) in embeddings.iter().zip(&writers) {
|
||||||
|
writer.add_item(wtxn, docid, embedding)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// perform the manual diff
|
// perform the manual diff
|
||||||
@@ -723,14 +727,46 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
|
if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
|
||||||
let vector: Vec<f32> = pod_collect_to_vec(value);
|
let vector: Vec<f32> = pod_collect_to_vec(value);
|
||||||
|
|
||||||
writer.del_item(wtxn, docid, &vector)?;
|
let mut deleted_index = None;
|
||||||
|
for (index, writer) in writers.iter().enumerate() {
|
||||||
|
let Some(candidate) = writer.item_vector(wtxn, docid)? else {
|
||||||
|
// uses invariant: vectors are packed in the first writers.
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
if candidate == vector {
|
||||||
|
writer.del_item(wtxn, docid)?;
|
||||||
|
deleted_index = Some(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 🥲 enforce invariant: vectors are packed in the first writers.
|
||||||
|
if let Some(deleted_index) = deleted_index {
|
||||||
|
let mut last_index_with_a_vector = None;
|
||||||
|
for (index, writer) in writers.iter().enumerate().skip(deleted_index) {
|
||||||
|
let Some(candidate) = writer.item_vector(wtxn, docid)? else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
last_index_with_a_vector = Some((index, candidate));
|
||||||
|
}
|
||||||
|
if let Some((last_index, vector)) = last_index_with_a_vector {
|
||||||
|
// unwrap: computed the index from the list of writers
|
||||||
|
let writer = writers.get(last_index).unwrap();
|
||||||
|
writer.del_item(wtxn, docid)?;
|
||||||
|
writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(value) = vector_deladd_obkv.get(DelAdd::Addition) {
|
if let Some(value) = vector_deladd_obkv.get(DelAdd::Addition) {
|
||||||
let vector = pod_collect_to_vec(value);
|
let vector = pod_collect_to_vec(value);
|
||||||
|
|
||||||
// overflow was detected during vector extraction.
|
// overflow was detected during vector extraction.
|
||||||
|
for writer in &writers {
|
||||||
|
if !writer.contains_item(wtxn, docid)? {
|
||||||
writer.add_item(wtxn, docid, &vector)?;
|
writer.add_item(wtxn, docid, &vector)?;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -954,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let old_configs = self.index.embedding_configs(self.wtxn)?;
|
let old_configs = self.index.embedding_configs(self.wtxn)?;
|
||||||
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
|
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> {
|
.map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> {
|
||||||
let embedder_id =
|
let embedder_id =
|
||||||
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
|
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
|
||||||
crate::InternalError::DatabaseMissingEntry {
|
crate::InternalError::DatabaseMissingEntry {
|
||||||
@@ -964,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
)?;
|
)?;
|
||||||
Ok((
|
Ok((
|
||||||
name,
|
name,
|
||||||
EmbedderAction::with_write_back(
|
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
||||||
WriteBackToDocuments { embedder_id, user_provided },
|
embedder_id,
|
||||||
config.quantized(),
|
user_provided,
|
||||||
),
|
}),
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
@@ -1004,8 +1004,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
match joined {
|
match joined {
|
||||||
// updated config
|
// updated config
|
||||||
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
|
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
|
||||||
let was_quantized = old.binary_quantized.set().unwrap_or_default();
|
let settings_diff = SettingsDiff::from_settings(old, new);
|
||||||
let settings_diff = SettingsDiff::from_settings(&name, old, new)?;
|
|
||||||
match settings_diff {
|
match settings_diff {
|
||||||
SettingsDiff::Remove => {
|
SettingsDiff::Remove => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
@@ -1024,29 +1023,25 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
self.index.embedder_category_id.delete(self.wtxn, &name)?;
|
self.index.embedder_category_id.delete(self.wtxn, &name)?;
|
||||||
embedder_actions.insert(
|
embedder_actions.insert(
|
||||||
name,
|
name,
|
||||||
EmbedderAction::with_write_back(
|
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
||||||
WriteBackToDocuments { embedder_id, user_provided },
|
embedder_id,
|
||||||
was_quantized,
|
user_provided,
|
||||||
),
|
}),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
SettingsDiff::Reindex { action, updated_settings, quantize } => {
|
SettingsDiff::Reindex { action, updated_settings } => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
embedder = name,
|
embedder = name,
|
||||||
user_provided = user_provided.len(),
|
user_provided = user_provided.len(),
|
||||||
?action,
|
?action,
|
||||||
"reindex embedder"
|
"reindex embedder"
|
||||||
);
|
);
|
||||||
embedder_actions.insert(
|
embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action));
|
||||||
name.clone(),
|
|
||||||
EmbedderAction::with_reindex(action, was_quantized)
|
|
||||||
.with_is_being_quantized(quantize),
|
|
||||||
);
|
|
||||||
let new =
|
let new =
|
||||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||||
updated_configs.insert(name, (new, user_provided));
|
updated_configs.insert(name, (new, user_provided));
|
||||||
}
|
}
|
||||||
SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
|
SettingsDiff::UpdateWithoutReindex { updated_settings } => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
embedder = name,
|
embedder = name,
|
||||||
user_provided = user_provided.len(),
|
user_provided = user_provided.len(),
|
||||||
@@ -1054,12 +1049,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
);
|
);
|
||||||
let new =
|
let new =
|
||||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||||
if quantize {
|
|
||||||
embedder_actions.insert(
|
|
||||||
name.clone(),
|
|
||||||
EmbedderAction::default().with_is_being_quantized(true),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
updated_configs.insert(name, (new, user_provided));
|
updated_configs.insert(name, (new, user_provided));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1078,10 +1067,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
&mut setting,
|
&mut setting,
|
||||||
);
|
);
|
||||||
let setting = validate_embedding_settings(setting, &name)?;
|
let setting = validate_embedding_settings(setting, &name)?;
|
||||||
embedder_actions.insert(
|
embedder_actions
|
||||||
name.clone(),
|
.insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex));
|
||||||
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
|
|
||||||
);
|
|
||||||
updated_configs.insert(name, (setting, RoaringBitmap::new()));
|
updated_configs.insert(name, (setting, RoaringBitmap::new()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1095,16 +1082,21 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let mut find_free_index =
|
let mut find_free_index =
|
||||||
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
|
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
|
||||||
for (name, action) in embedder_actions.iter() {
|
for (name, action) in embedder_actions.iter() {
|
||||||
// ignore actions that are not possible for a new embedder
|
match action {
|
||||||
if matches!(action.reindex(), Some(ReindexAction::FullReindex))
|
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => {
|
||||||
&& self.index.embedder_category_id.get(self.wtxn, name)?.is_none()
|
/* cannot be a new embedder, so has to have an id already */
|
||||||
{
|
}
|
||||||
let id =
|
EmbedderAction::Reindex(ReindexAction::FullReindex) => {
|
||||||
find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() {
|
||||||
|
let id = find_free_index()
|
||||||
|
.ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
||||||
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
||||||
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
|
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(name, (config, user_provided))| match config {
|
.filter_map(|(name, (config, user_provided))| match config {
|
||||||
@@ -1285,11 +1277,7 @@ impl InnerIndexSettingsDiff {
|
|||||||
|
|
||||||
// if the user-defined searchables changed, then we need to reindex prompts.
|
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||||
if cache_user_defined_searchables {
|
if cache_user_defined_searchables {
|
||||||
for (embedder_name, (config, _, _quantized)) in
|
for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() {
|
||||||
new_settings.embedding_configs.inner_as_ref()
|
|
||||||
{
|
|
||||||
let was_quantized =
|
|
||||||
old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2);
|
|
||||||
// skip embedders that don't use document templates
|
// skip embedders that don't use document templates
|
||||||
if !config.uses_document_template() {
|
if !config.uses_document_template() {
|
||||||
continue;
|
continue;
|
||||||
@@ -1299,19 +1287,16 @@ impl InnerIndexSettingsDiff {
|
|||||||
// this always makes the code clearer by explicitly handling the cases
|
// this always makes the code clearer by explicitly handling the cases
|
||||||
match embedding_config_updates.entry(embedder_name.clone()) {
|
match embedding_config_updates.entry(embedder_name.clone()) {
|
||||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||||
entry.insert(EmbedderAction::with_reindex(
|
entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts));
|
||||||
ReindexAction::RegeneratePrompts,
|
|
||||||
was_quantized,
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
std::collections::btree_map::Entry::Occupied(entry) => {
|
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() {
|
||||||
let EmbedderAction {
|
EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */
|
||||||
was_quantized: _,
|
|
||||||
is_being_quantized: _,
|
|
||||||
write_back: _, // We are deleting this embedder, so no point in regeneration
|
|
||||||
reindex: _, // We are already fully reindexing
|
|
||||||
} = entry.get();
|
|
||||||
}
|
}
|
||||||
|
EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */
|
||||||
|
}
|
||||||
|
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */
|
||||||
|
}
|
||||||
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1561,7 +1546,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
|||||||
.map(
|
.map(
|
||||||
|IndexEmbeddingConfig {
|
|IndexEmbeddingConfig {
|
||||||
name,
|
name,
|
||||||
config: EmbeddingConfig { embedder_options, prompt, quantized },
|
config: EmbeddingConfig { embedder_options, prompt },
|
||||||
..
|
..
|
||||||
}| {
|
}| {
|
||||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||||
@@ -1571,7 +1556,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
|||||||
.map_err(crate::vector::Error::from)
|
.map_err(crate::vector::Error::from)
|
||||||
.map_err(crate::Error::from)?,
|
.map_err(crate::Error::from)?,
|
||||||
);
|
);
|
||||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
Ok((name, (embedder, prompt)))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.collect();
|
.collect();
|
||||||
@@ -1596,7 +1581,6 @@ fn validate_prompt(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
binary_quantized: binary_quantize,
|
|
||||||
}) => {
|
}) => {
|
||||||
let max_bytes = match document_template_max_bytes.set() {
|
let max_bytes = match document_template_max_bytes.set() {
|
||||||
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
|
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
|
||||||
@@ -1629,7 +1613,6 @@ fn validate_prompt(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
binary_quantized: binary_quantize,
|
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
new => Ok(new),
|
new => Ok(new),
|
||||||
@@ -1655,7 +1638,6 @@ pub fn validate_embedding_settings(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
binary_quantized: binary_quantize,
|
|
||||||
} = settings;
|
} = settings;
|
||||||
|
|
||||||
if let Some(0) = dimensions.set() {
|
if let Some(0) = dimensions.set() {
|
||||||
@@ -1696,7 +1678,6 @@ pub fn validate_embedding_settings(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
binary_quantized: binary_quantize,
|
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
match inferred_source {
|
match inferred_source {
|
||||||
@@ -1798,7 +1779,6 @@ pub fn validate_embedding_settings(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
binary_quantized: binary_quantize,
|
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ pub enum EmbedErrorKind {
|
|||||||
ManualEmbed(String),
|
ManualEmbed(String),
|
||||||
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually{}", option_info(.0.as_deref(), "server replied with "))]
|
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually{}", option_info(.0.as_deref(), "server replied with "))]
|
||||||
OllamaModelNotFoundError(Option<String>),
|
OllamaModelNotFoundError(Option<String>),
|
||||||
#[error("error deserializing the response body as JSON:\n - {0}")]
|
#[error("error deserialization the response body as JSON:\n - {0}")]
|
||||||
RestResponseDeserialization(std::io::Error),
|
RestResponseDeserialization(std::io::Error),
|
||||||
#[error("expected a response containing {0} embeddings, got only {1}")]
|
#[error("expected a response containing {0} embeddings, got only {1}")]
|
||||||
RestResponseEmbeddingCount(usize, usize),
|
RestResponseEmbeddingCount(usize, usize),
|
||||||
|
|||||||
@@ -1,13 +1,8 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
use arroy::distances::{BinaryQuantizedCosine, Cosine};
|
|
||||||
use arroy::ItemId;
|
|
||||||
use deserr::{DeserializeError, Deserr};
|
use deserr::{DeserializeError, Deserr};
|
||||||
use heed::{RoTxn, RwTxn, Unspecified};
|
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use self::error::{EmbedError, NewEmbedderError};
|
use self::error::{EmbedError, NewEmbedderError};
|
||||||
@@ -31,386 +26,6 @@ pub type Embedding = Vec<f32>;
|
|||||||
|
|
||||||
pub const REQUEST_PARALLELISM: usize = 40;
|
pub const REQUEST_PARALLELISM: usize = 40;
|
||||||
|
|
||||||
pub struct ArroyWrapper {
|
|
||||||
quantized: bool,
|
|
||||||
embedder_index: u8,
|
|
||||||
database: arroy::Database<Unspecified>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ArroyWrapper {
|
|
||||||
pub fn new(
|
|
||||||
database: arroy::Database<Unspecified>,
|
|
||||||
embedder_index: u8,
|
|
||||||
quantized: bool,
|
|
||||||
) -> Self {
|
|
||||||
Self { database, embedder_index, quantized }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn embedder_index(&self) -> u8 {
|
|
||||||
self.embedder_index
|
|
||||||
}
|
|
||||||
|
|
||||||
fn readers<'a, D: arroy::Distance>(
|
|
||||||
&'a self,
|
|
||||||
rtxn: &'a RoTxn<'a>,
|
|
||||||
db: arroy::Database<D>,
|
|
||||||
) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a {
|
|
||||||
arroy_db_range_for_embedder(self.embedder_index).map_while(move |index| {
|
|
||||||
match arroy::Reader::open(rtxn, index, db) {
|
|
||||||
Ok(reader) => match reader.is_empty(rtxn) {
|
|
||||||
Ok(false) => Some(Ok(reader)),
|
|
||||||
Ok(true) => None,
|
|
||||||
Err(e) => Some(Err(e)),
|
|
||||||
},
|
|
||||||
Err(arroy::Error::MissingMetadata(_)) => None,
|
|
||||||
Err(e) => Some(Err(e)),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
|
|
||||||
let first_id = arroy_db_range_for_embedder(self.embedder_index).next().unwrap();
|
|
||||||
if self.quantized {
|
|
||||||
Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions())
|
|
||||||
} else {
|
|
||||||
Ok(arroy::Reader::open(rtxn, first_id, self.angular_db())?.dimensions())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
|
||||||
&mut self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
rng: &mut R,
|
|
||||||
dimension: usize,
|
|
||||||
quantizing: bool,
|
|
||||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
|
||||||
) -> Result<(), arroy::Error> {
|
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
|
||||||
if self.quantized {
|
|
||||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
|
||||||
if writer.need_build(wtxn)? {
|
|
||||||
writer.builder(rng).build(wtxn)?
|
|
||||||
} else if writer.is_empty(wtxn)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
|
||||||
// If we are quantizing the databases, we can't know from meilisearch
|
|
||||||
// if the db was empty but still contained the wrong metadata, thus we need
|
|
||||||
// to quantize everything and can't stop early. Since this operation can
|
|
||||||
// only happens once in the life of an embedder, it's not very performances
|
|
||||||
// sensitive.
|
|
||||||
if quantizing && !self.quantized {
|
|
||||||
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
|
|
||||||
writer.builder(rng).cancel(cancel).build(wtxn)?;
|
|
||||||
} else if writer.need_build(wtxn)? {
|
|
||||||
writer.builder(rng).cancel(cancel).build(wtxn)?;
|
|
||||||
} else if writer.is_empty(wtxn)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Overwrite all the embeddings associated with the index and item ID.
|
|
||||||
/// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings.
|
|
||||||
/// You should call `del_items` on the `item_id` before calling this method.
|
|
||||||
/// /!\ Cannot insert more than u8::MAX embeddings; after inserting u8::MAX embeddings, all the remaining ones will be silently ignored.
|
|
||||||
pub fn add_items(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
item_id: arroy::ItemId,
|
|
||||||
embeddings: &Embeddings<f32>,
|
|
||||||
) -> Result<(), arroy::Error> {
|
|
||||||
let dimension = embeddings.dimension();
|
|
||||||
for (index, vector) in
|
|
||||||
arroy_db_range_for_embedder(self.embedder_index).zip(embeddings.iter())
|
|
||||||
{
|
|
||||||
if self.quantized {
|
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension)
|
|
||||||
.add_item(wtxn, item_id, vector)?
|
|
||||||
} else {
|
|
||||||
arroy::Writer::new(self.angular_db(), index, dimension)
|
|
||||||
.add_item(wtxn, item_id, vector)?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add one document int for this index where we can find an empty spot.
|
|
||||||
pub fn add_item(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
item_id: arroy::ItemId,
|
|
||||||
vector: &[f32],
|
|
||||||
) -> Result<(), arroy::Error> {
|
|
||||||
if self.quantized {
|
|
||||||
self._add_item(wtxn, self.quantized_db(), item_id, vector)
|
|
||||||
} else {
|
|
||||||
self._add_item(wtxn, self.angular_db(), item_id, vector)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _add_item<D: arroy::Distance>(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
db: arroy::Database<D>,
|
|
||||||
item_id: arroy::ItemId,
|
|
||||||
vector: &[f32],
|
|
||||||
) -> Result<(), arroy::Error> {
|
|
||||||
let dimension = vector.len();
|
|
||||||
|
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
|
||||||
let writer = arroy::Writer::new(db, index, dimension);
|
|
||||||
if !writer.contains_item(wtxn, item_id)? {
|
|
||||||
writer.add_item(wtxn, item_id, vector)?;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Delete all embeddings from a specific `item_id`
|
|
||||||
pub fn del_items(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
dimension: usize,
|
|
||||||
item_id: arroy::ItemId,
|
|
||||||
) -> Result<(), arroy::Error> {
|
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
|
||||||
if self.quantized {
|
|
||||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
|
||||||
if !writer.del_item(wtxn, item_id)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
|
||||||
if !writer.del_item(wtxn, item_id)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Delete one item.
|
|
||||||
pub fn del_item(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
item_id: arroy::ItemId,
|
|
||||||
vector: &[f32],
|
|
||||||
) -> Result<bool, arroy::Error> {
|
|
||||||
if self.quantized {
|
|
||||||
self._del_item(wtxn, self.quantized_db(), item_id, vector)
|
|
||||||
} else {
|
|
||||||
self._del_item(wtxn, self.angular_db(), item_id, vector)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _del_item<D: arroy::Distance>(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
db: arroy::Database<D>,
|
|
||||||
item_id: arroy::ItemId,
|
|
||||||
vector: &[f32],
|
|
||||||
) -> Result<bool, arroy::Error> {
|
|
||||||
let dimension = vector.len();
|
|
||||||
let mut deleted_index = None;
|
|
||||||
|
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
|
||||||
let writer = arroy::Writer::new(db, index, dimension);
|
|
||||||
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
|
|
||||||
// uses invariant: vectors are packed in the first writers.
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
if candidate == vector {
|
|
||||||
writer.del_item(wtxn, item_id)?;
|
|
||||||
deleted_index = Some(index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 🥲 enforce invariant: vectors are packed in the first writers.
|
|
||||||
if let Some(deleted_index) = deleted_index {
|
|
||||||
let mut last_index_with_a_vector = None;
|
|
||||||
for index in
|
|
||||||
arroy_db_range_for_embedder(self.embedder_index).skip(deleted_index as usize)
|
|
||||||
{
|
|
||||||
let writer = arroy::Writer::new(db, index, dimension);
|
|
||||||
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
last_index_with_a_vector = Some((index, candidate));
|
|
||||||
}
|
|
||||||
if let Some((last_index, vector)) = last_index_with_a_vector {
|
|
||||||
let writer = arroy::Writer::new(db, last_index, dimension);
|
|
||||||
writer.del_item(wtxn, item_id)?;
|
|
||||||
let writer = arroy::Writer::new(db, deleted_index, dimension);
|
|
||||||
writer.add_item(wtxn, item_id, &vector)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(deleted_index.is_some())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
|
||||||
if self.quantized {
|
|
||||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
|
||||||
if writer.is_empty(wtxn)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
writer.clear(wtxn)?;
|
|
||||||
} else {
|
|
||||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
|
||||||
if writer.is_empty(wtxn)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
writer.clear(wtxn)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn contains_item(
|
|
||||||
&self,
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
dimension: usize,
|
|
||||||
item: arroy::ItemId,
|
|
||||||
) -> Result<bool, arroy::Error> {
|
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
|
||||||
let contains = if self.quantized {
|
|
||||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
|
||||||
if writer.is_empty(rtxn)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
writer.contains_item(rtxn, item)?
|
|
||||||
} else {
|
|
||||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
|
||||||
if writer.is_empty(rtxn)? {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
writer.contains_item(rtxn, item)?
|
|
||||||
};
|
|
||||||
if contains {
|
|
||||||
return Ok(contains);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn nns_by_item(
|
|
||||||
&self,
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
item: ItemId,
|
|
||||||
limit: usize,
|
|
||||||
filter: Option<&RoaringBitmap>,
|
|
||||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
|
||||||
if self.quantized {
|
|
||||||
self._nns_by_item(rtxn, self.quantized_db(), item, limit, filter)
|
|
||||||
} else {
|
|
||||||
self._nns_by_item(rtxn, self.angular_db(), item, limit, filter)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _nns_by_item<D: arroy::Distance>(
|
|
||||||
&self,
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
db: arroy::Database<D>,
|
|
||||||
item: ItemId,
|
|
||||||
limit: usize,
|
|
||||||
filter: Option<&RoaringBitmap>,
|
|
||||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
|
||||||
let mut results = Vec::new();
|
|
||||||
|
|
||||||
for reader in self.readers(rtxn, db) {
|
|
||||||
let reader = reader?;
|
|
||||||
let mut searcher = reader.nns(limit);
|
|
||||||
if let Some(filter) = filter {
|
|
||||||
searcher.candidates(filter);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(mut ret) = searcher.by_item(rtxn, item)? {
|
|
||||||
results.append(&mut ret);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
|
||||||
Ok(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn nns_by_vector(
|
|
||||||
&self,
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
vector: &[f32],
|
|
||||||
limit: usize,
|
|
||||||
filter: Option<&RoaringBitmap>,
|
|
||||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
|
||||||
if self.quantized {
|
|
||||||
self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter)
|
|
||||||
} else {
|
|
||||||
self._nns_by_vector(rtxn, self.angular_db(), vector, limit, filter)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _nns_by_vector<D: arroy::Distance>(
|
|
||||||
&self,
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
db: arroy::Database<D>,
|
|
||||||
vector: &[f32],
|
|
||||||
limit: usize,
|
|
||||||
filter: Option<&RoaringBitmap>,
|
|
||||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
|
||||||
let mut results = Vec::new();
|
|
||||||
|
|
||||||
for reader in self.readers(rtxn, db) {
|
|
||||||
let reader = reader?;
|
|
||||||
let mut searcher = reader.nns(limit);
|
|
||||||
if let Some(filter) = filter {
|
|
||||||
searcher.candidates(filter);
|
|
||||||
}
|
|
||||||
|
|
||||||
results.append(&mut searcher.by_vector(rtxn, vector)?);
|
|
||||||
}
|
|
||||||
|
|
||||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
|
||||||
|
|
||||||
Ok(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result<Vec<Vec<f32>>, arroy::Error> {
|
|
||||||
let mut vectors = Vec::new();
|
|
||||||
|
|
||||||
if self.quantized {
|
|
||||||
for reader in self.readers(rtxn, self.quantized_db()) {
|
|
||||||
if let Some(vec) = reader?.item_vector(rtxn, item_id)? {
|
|
||||||
vectors.push(vec);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for reader in self.readers(rtxn, self.angular_db()) {
|
|
||||||
if let Some(vec) = reader?.item_vector(rtxn, item_id)? {
|
|
||||||
vectors.push(vec);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(vectors)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn angular_db(&self) -> arroy::Database<Cosine> {
|
|
||||||
self.database.remap_data_type()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
|
|
||||||
self.database.remap_data_type()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||||
pub struct Embeddings<F> {
|
pub struct Embeddings<F> {
|
||||||
data: Vec<F>,
|
data: Vec<F>,
|
||||||
@@ -509,48 +124,62 @@ pub struct EmbeddingConfig {
|
|||||||
pub embedder_options: EmbedderOptions,
|
pub embedder_options: EmbedderOptions,
|
||||||
/// Document template
|
/// Document template
|
||||||
pub prompt: PromptData,
|
pub prompt: PromptData,
|
||||||
/// If this embedder is binary quantized
|
|
||||||
pub quantized: Option<bool>,
|
|
||||||
// TODO: add metrics and anything needed
|
// TODO: add metrics and anything needed
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbeddingConfig {
|
|
||||||
pub fn quantized(&self) -> bool {
|
|
||||||
self.quantized.unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Map of embedder configurations.
|
/// Map of embedder configurations.
|
||||||
///
|
///
|
||||||
/// Each configuration is mapped to a name.
|
/// Each configuration is mapped to a name.
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>);
|
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>)>);
|
||||||
|
|
||||||
impl EmbeddingConfigs {
|
impl EmbeddingConfigs {
|
||||||
/// Create the map from its internal component.s
|
/// Create the map from its internal component.s
|
||||||
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>) -> Self {
|
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>)>) -> Self {
|
||||||
Self(data)
|
Self(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an embedder configuration and template from its name.
|
/// Get an embedder configuration and template from its name.
|
||||||
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>, bool)> {
|
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
||||||
self.0.get(name).cloned()
|
self.0.get(name).cloned()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
/// Get the default embedder configuration, if any.
|
||||||
|
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
||||||
|
self.get(self.get_default_embedder_name())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
||||||
self.0
|
self.0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the name of the default embedder configuration.
|
||||||
|
///
|
||||||
|
/// The default embedder is determined as follows:
|
||||||
|
///
|
||||||
|
/// - If there is only one embedder, it is always the default.
|
||||||
|
/// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder.
|
||||||
|
/// - In all other cases, there is no default embedder.
|
||||||
|
pub fn get_default_embedder_name(&self) -> &str {
|
||||||
|
let mut it = self.0.keys();
|
||||||
|
let first_name = it.next();
|
||||||
|
let second_name = it.next();
|
||||||
|
match (first_name, second_name) {
|
||||||
|
(None, _) => "default",
|
||||||
|
(Some(first), None) => first,
|
||||||
|
(Some(_), Some(_)) => "default",
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IntoIterator for EmbeddingConfigs {
|
impl IntoIterator for EmbeddingConfigs {
|
||||||
type Item = (String, (Arc<Embedder>, Arc<Prompt>, bool));
|
type Item = (String, (Arc<Embedder>, Arc<Prompt>));
|
||||||
|
|
||||||
type IntoIter =
|
type IntoIter = std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>)>;
|
||||||
std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>, bool)>;
|
|
||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
self.0.into_iter()
|
self.0.into_iter()
|
||||||
@@ -595,23 +224,18 @@ impl Embedder {
|
|||||||
pub fn embed(
|
pub fn embed(
|
||||||
&self,
|
&self,
|
||||||
texts: Vec<String>,
|
texts: Vec<String>,
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> std::result::Result<Vec<Embeddings<f32>>, EmbedError> {
|
) -> std::result::Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.embed(texts),
|
Embedder::HuggingFace(embedder) => embedder.embed(texts),
|
||||||
Embedder::OpenAi(embedder) => embedder.embed(texts, deadline),
|
Embedder::OpenAi(embedder) => embedder.embed(texts),
|
||||||
Embedder::Ollama(embedder) => embedder.embed(texts, deadline),
|
Embedder::Ollama(embedder) => embedder.embed(texts),
|
||||||
Embedder::UserProvided(embedder) => embedder.embed(texts),
|
Embedder::UserProvided(embedder) => embedder.embed(texts),
|
||||||
Embedder::Rest(embedder) => embedder.embed(texts, deadline),
|
Embedder::Rest(embedder) => embedder.embed(texts),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed_one(
|
pub fn embed_one(&self, text: String) -> std::result::Result<Embedding, EmbedError> {
|
||||||
&self,
|
let mut embeddings = self.embed(vec![text])?;
|
||||||
text: String,
|
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> std::result::Result<Embedding, EmbedError> {
|
|
||||||
let mut embeddings = self.embed(vec![text], deadline)?;
|
|
||||||
let embeddings = embeddings.pop().ok_or_else(EmbedError::missing_embedding)?;
|
let embeddings = embeddings.pop().ok_or_else(EmbedError::missing_embedding)?;
|
||||||
Ok(if embeddings.iter().nth(1).is_some() {
|
Ok(if embeddings.iter().nth(1).is_some() {
|
||||||
tracing::warn!("Ignoring embeddings past the first one in long search query");
|
tracing::warn!("Ignoring embeddings past the first one in long search query");
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
||||||
|
|
||||||
use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
|
use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
|
||||||
@@ -77,12 +75,8 @@ impl Embedder {
|
|||||||
Ok(Self { rest_embedder })
|
Ok(Self { rest_embedder })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed(
|
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
&self,
|
match self.rest_embedder.embed(texts) {
|
||||||
texts: Vec<String>,
|
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
|
||||||
match self.rest_embedder.embed(texts, deadline) {
|
|
||||||
Ok(embeddings) => Ok(embeddings),
|
Ok(embeddings) => Ok(embeddings),
|
||||||
Err(EmbedError { kind: EmbedErrorKind::RestOtherStatusCode(404, error), fault: _ }) => {
|
Err(EmbedError { kind: EmbedErrorKind::RestOtherStatusCode(404, error), fault: _ }) => {
|
||||||
Err(EmbedError::ollama_model_not_found(error))
|
Err(EmbedError::ollama_model_not_found(error))
|
||||||
@@ -98,7 +92,7 @@ impl Embedder {
|
|||||||
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
||||||
})
|
})
|
||||||
.map_err(|error| EmbedError {
|
.map_err(|error| EmbedError {
|
||||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
|
use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
|
||||||
|
|
||||||
@@ -208,40 +206,32 @@ impl Embedder {
|
|||||||
Ok(Self { options, rest_embedder, tokenizer })
|
Ok(Self { options, rest_embedder, tokenizer })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed(
|
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
&self,
|
match self.rest_embedder.embed_ref(&texts) {
|
||||||
texts: Vec<String>,
|
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
|
||||||
match self.rest_embedder.embed_ref(&texts, deadline) {
|
|
||||||
Ok(embeddings) => Ok(embeddings),
|
Ok(embeddings) => Ok(embeddings),
|
||||||
Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error, _), fault: _ }) => {
|
Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error, _), fault: _ }) => {
|
||||||
tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template.");
|
tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template.");
|
||||||
self.try_embed_tokenized(&texts, deadline)
|
self.try_embed_tokenized(&texts)
|
||||||
}
|
}
|
||||||
Err(error) => Err(error),
|
Err(error) => Err(error),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_embed_tokenized(
|
fn try_embed_tokenized(&self, text: &[String]) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
&self,
|
|
||||||
text: &[String],
|
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
|
||||||
let mut all_embeddings = Vec::with_capacity(text.len());
|
let mut all_embeddings = Vec::with_capacity(text.len());
|
||||||
for text in text {
|
for text in text {
|
||||||
let max_token_count = self.options.embedding_model.max_token();
|
let max_token_count = self.options.embedding_model.max_token();
|
||||||
let encoded = self.tokenizer.encode_ordinary(text.as_str());
|
let encoded = self.tokenizer.encode_ordinary(text.as_str());
|
||||||
let len = encoded.len();
|
let len = encoded.len();
|
||||||
if len < max_token_count {
|
if len < max_token_count {
|
||||||
all_embeddings.append(&mut self.rest_embedder.embed_ref(&[text], deadline)?);
|
all_embeddings.append(&mut self.rest_embedder.embed_ref(&[text])?);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let tokens = &encoded.as_slice()[0..max_token_count];
|
let tokens = &encoded.as_slice()[0..max_token_count];
|
||||||
let mut embeddings_for_prompt = Embeddings::new(self.dimensions());
|
let mut embeddings_for_prompt = Embeddings::new(self.dimensions());
|
||||||
|
|
||||||
let embedding = self.rest_embedder.embed_tokens(tokens, deadline)?;
|
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
||||||
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
||||||
EmbedError::rest_unexpected_dimension(self.dimensions(), got.len())
|
EmbedError::rest_unexpected_dimension(self.dimensions(), got.len())
|
||||||
})?;
|
})?;
|
||||||
@@ -258,7 +248,7 @@ impl Embedder {
|
|||||||
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
||||||
})
|
})
|
||||||
.map_err(|error| EmbedError {
|
.map_err(|error| EmbedError {
|
||||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
@@ -131,7 +130,6 @@ impl Embedder {
|
|||||||
let client = ureq::AgentBuilder::new()
|
let client = ureq::AgentBuilder::new()
|
||||||
.max_idle_connections(REQUEST_PARALLELISM * 2)
|
.max_idle_connections(REQUEST_PARALLELISM * 2)
|
||||||
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
||||||
.timeout(std::time::Duration::from_secs(30))
|
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
let request = Request::new(options.request)?;
|
let request = Request::new(options.request)?;
|
||||||
@@ -156,31 +154,19 @@ impl Embedder {
|
|||||||
Ok(Self { data, dimensions, distribution: options.distribution })
|
Ok(Self { data, dimensions, distribution: options.distribution })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed(
|
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
&self,
|
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions))
|
||||||
texts: Vec<String>,
|
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
|
||||||
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions), deadline)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed_ref<S>(
|
pub fn embed_ref<S>(&self, texts: &[S]) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
||||||
&self,
|
|
||||||
texts: &[S],
|
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
|
||||||
where
|
where
|
||||||
S: AsRef<str> + Serialize,
|
S: AsRef<str> + Serialize,
|
||||||
{
|
{
|
||||||
embed(&self.data, texts, texts.len(), Some(self.dimensions), deadline)
|
embed(&self.data, texts, texts.len(), Some(self.dimensions))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed_tokens(
|
pub fn embed_tokens(&self, tokens: &[usize]) -> Result<Embeddings<f32>, EmbedError> {
|
||||||
&self,
|
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions))?;
|
||||||
tokens: &[usize],
|
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> Result<Embeddings<f32>, EmbedError> {
|
|
||||||
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?;
|
|
||||||
// unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error
|
// unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error
|
||||||
Ok(embeddings.pop().unwrap())
|
Ok(embeddings.pop().unwrap())
|
||||||
}
|
}
|
||||||
@@ -192,7 +178,7 @@ impl Embedder {
|
|||||||
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
||||||
})
|
})
|
||||||
.map_err(|error| EmbedError {
|
.map_err(|error| EmbedError {
|
||||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||||
@@ -221,7 +207,7 @@ impl Embedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn infer_dimensions(data: &EmbedderData) -> Result<usize, NewEmbedderError> {
|
fn infer_dimensions(data: &EmbedderData) -> Result<usize, NewEmbedderError> {
|
||||||
let v = embed(data, ["test"].as_slice(), 1, None, None)
|
let v = embed(data, ["test"].as_slice(), 1, None)
|
||||||
.map_err(NewEmbedderError::could_not_determine_dimension)?;
|
.map_err(NewEmbedderError::could_not_determine_dimension)?;
|
||||||
// unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error
|
// unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error
|
||||||
Ok(v.first().unwrap().dimension())
|
Ok(v.first().unwrap().dimension())
|
||||||
@@ -232,7 +218,6 @@ fn embed<S>(
|
|||||||
inputs: &[S],
|
inputs: &[S],
|
||||||
expected_count: usize,
|
expected_count: usize,
|
||||||
expected_dimension: Option<usize>,
|
expected_dimension: Option<usize>,
|
||||||
deadline: Option<Instant>,
|
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
||||||
where
|
where
|
||||||
S: Serialize,
|
S: Serialize,
|
||||||
@@ -252,27 +237,16 @@ where
|
|||||||
|
|
||||||
for attempt in 0..10 {
|
for attempt in 0..10 {
|
||||||
let response = request.clone().send_json(&body);
|
let response = request.clone().send_json(&body);
|
||||||
let result = check_response(response, data.configuration_source).and_then(|response| {
|
let result = check_response(response, data.configuration_source);
|
||||||
response_to_embedding(response, data, expected_count, expected_dimension)
|
|
||||||
});
|
|
||||||
|
|
||||||
let retry_duration = match result {
|
let retry_duration = match result {
|
||||||
Ok(response) => return Ok(response),
|
Ok(response) => {
|
||||||
|
return response_to_embedding(response, data, expected_count, expected_dimension)
|
||||||
|
}
|
||||||
Err(retry) => {
|
Err(retry) => {
|
||||||
tracing::warn!("Failed: {}", retry.error);
|
tracing::warn!("Failed: {}", retry.error);
|
||||||
if let Some(deadline) = deadline {
|
|
||||||
let now = std::time::Instant::now();
|
|
||||||
if now > deadline {
|
|
||||||
tracing::warn!("Could not embed due to deadline");
|
|
||||||
return Err(retry.into_error());
|
|
||||||
}
|
|
||||||
|
|
||||||
let duration_to_deadline = deadline - now;
|
|
||||||
retry.into_duration(attempt).map(|duration| duration.min(duration_to_deadline))
|
|
||||||
} else {
|
|
||||||
retry.into_duration(attempt)
|
retry.into_duration(attempt)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}?;
|
}?;
|
||||||
|
|
||||||
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
||||||
@@ -289,7 +263,6 @@ where
|
|||||||
let result = check_response(response, data.configuration_source);
|
let result = check_response(response, data.configuration_source);
|
||||||
result.map_err(Retry::into_error).and_then(|response| {
|
result.map_err(Retry::into_error).and_then(|response| {
|
||||||
response_to_embedding(response, data, expected_count, expected_dimension)
|
response_to_embedding(response, data, expected_count, expected_dimension)
|
||||||
.map_err(Retry::into_error)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -331,28 +304,23 @@ fn response_to_embedding(
|
|||||||
data: &EmbedderData,
|
data: &EmbedderData,
|
||||||
expected_count: usize,
|
expected_count: usize,
|
||||||
expected_dimensions: Option<usize>,
|
expected_dimensions: Option<usize>,
|
||||||
) -> Result<Vec<Embeddings<f32>>, Retry> {
|
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
let response: serde_json::Value = response
|
let response: serde_json::Value =
|
||||||
.into_json()
|
response.into_json().map_err(EmbedError::rest_response_deserialization)?;
|
||||||
.map_err(EmbedError::rest_response_deserialization)
|
|
||||||
.map_err(Retry::retry_later)?;
|
|
||||||
|
|
||||||
let embeddings = data.response.extract_embeddings(response).map_err(Retry::give_up)?;
|
let embeddings = data.response.extract_embeddings(response)?;
|
||||||
|
|
||||||
if embeddings.len() != expected_count {
|
if embeddings.len() != expected_count {
|
||||||
return Err(Retry::give_up(EmbedError::rest_response_embedding_count(
|
return Err(EmbedError::rest_response_embedding_count(expected_count, embeddings.len()));
|
||||||
expected_count,
|
|
||||||
embeddings.len(),
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(dimensions) = expected_dimensions {
|
if let Some(dimensions) = expected_dimensions {
|
||||||
for embedding in &embeddings {
|
for embedding in &embeddings {
|
||||||
if embedding.dimension() != dimensions {
|
if embedding.dimension() != dimensions {
|
||||||
return Err(Retry::give_up(EmbedError::rest_unexpected_dimension(
|
return Err(EmbedError::rest_unexpected_dimension(
|
||||||
dimensions,
|
dimensions,
|
||||||
embedding.dimension(),
|
embedding.dimension(),
|
||||||
)));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,9 +32,6 @@ pub struct EmbeddingSettings {
|
|||||||
pub dimensions: Setting<usize>,
|
pub dimensions: Setting<usize>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub binary_quantized: Setting<bool>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub document_template: Setting<String>,
|
pub document_template: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
@@ -88,63 +85,23 @@ pub enum ReindexAction {
|
|||||||
|
|
||||||
pub enum SettingsDiff {
|
pub enum SettingsDiff {
|
||||||
Remove,
|
Remove,
|
||||||
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool },
|
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings },
|
||||||
UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool },
|
UpdateWithoutReindex { updated_settings: EmbeddingSettings },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug)]
|
pub enum EmbedderAction {
|
||||||
pub struct EmbedderAction {
|
WriteBackToDocuments(WriteBackToDocuments),
|
||||||
pub was_quantized: bool,
|
Reindex(ReindexAction),
|
||||||
pub is_being_quantized: bool,
|
|
||||||
pub write_back: Option<WriteBackToDocuments>,
|
|
||||||
pub reindex: Option<ReindexAction>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbedderAction {
|
|
||||||
pub fn is_being_quantized(&self) -> bool {
|
|
||||||
self.is_being_quantized
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn write_back(&self) -> Option<&WriteBackToDocuments> {
|
|
||||||
self.write_back.as_ref()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn reindex(&self) -> Option<&ReindexAction> {
|
|
||||||
self.reindex.as_ref()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_is_being_quantized(mut self, quantize: bool) -> Self {
|
|
||||||
self.is_being_quantized = quantize;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self {
|
|
||||||
Self {
|
|
||||||
was_quantized,
|
|
||||||
is_being_quantized: false,
|
|
||||||
write_back: Some(write_back),
|
|
||||||
reindex: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self {
|
|
||||||
Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct WriteBackToDocuments {
|
pub struct WriteBackToDocuments {
|
||||||
pub embedder_id: u8,
|
pub embedder_id: u8,
|
||||||
pub user_provided: RoaringBitmap,
|
pub user_provided: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SettingsDiff {
|
impl SettingsDiff {
|
||||||
pub fn from_settings(
|
pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self {
|
||||||
embedder_name: &str,
|
match new {
|
||||||
old: EmbeddingSettings,
|
|
||||||
new: Setting<EmbeddingSettings>,
|
|
||||||
) -> Result<Self, UserError> {
|
|
||||||
let ret = match new {
|
|
||||||
Setting::Set(new) => {
|
Setting::Set(new) => {
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
mut source,
|
mut source,
|
||||||
@@ -159,7 +116,6 @@ impl SettingsDiff {
|
|||||||
mut distribution,
|
mut distribution,
|
||||||
mut headers,
|
mut headers,
|
||||||
mut document_template_max_bytes,
|
mut document_template_max_bytes,
|
||||||
binary_quantized: mut binary_quantize,
|
|
||||||
} = old;
|
} = old;
|
||||||
|
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
@@ -175,17 +131,8 @@ impl SettingsDiff {
|
|||||||
distribution: new_distribution,
|
distribution: new_distribution,
|
||||||
headers: new_headers,
|
headers: new_headers,
|
||||||
document_template_max_bytes: new_document_template_max_bytes,
|
document_template_max_bytes: new_document_template_max_bytes,
|
||||||
binary_quantized: new_binary_quantize,
|
|
||||||
} = new;
|
} = new;
|
||||||
|
|
||||||
if matches!(binary_quantize, Setting::Set(true))
|
|
||||||
&& matches!(new_binary_quantize, Setting::Set(false))
|
|
||||||
{
|
|
||||||
return Err(UserError::InvalidDisableBinaryQuantization {
|
|
||||||
embedder_name: embedder_name.to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut reindex_action = None;
|
let mut reindex_action = None;
|
||||||
|
|
||||||
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
||||||
@@ -225,7 +172,6 @@ impl SettingsDiff {
|
|||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let binary_quantize_changed = binary_quantize.apply(new_binary_quantize);
|
|
||||||
if url.apply(new_url) {
|
if url.apply(new_url) {
|
||||||
match source {
|
match source {
|
||||||
// do not regenerate on an url change in OpenAI
|
// do not regenerate on an url change in OpenAI
|
||||||
@@ -285,27 +231,16 @@ impl SettingsDiff {
|
|||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
document_template_max_bytes,
|
document_template_max_bytes,
|
||||||
binary_quantized: binary_quantize,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
match reindex_action {
|
match reindex_action {
|
||||||
Some(action) => Self::Reindex {
|
Some(action) => Self::Reindex { action, updated_settings },
|
||||||
action,
|
None => Self::UpdateWithoutReindex { updated_settings },
|
||||||
updated_settings,
|
|
||||||
quantize: binary_quantize_changed,
|
|
||||||
},
|
|
||||||
None => Self::UpdateWithoutReindex {
|
|
||||||
updated_settings,
|
|
||||||
quantize: binary_quantize_changed,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Setting::Reset => Self::Remove,
|
Setting::Reset => Self::Remove,
|
||||||
Setting::NotSet => {
|
Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old },
|
||||||
Self::UpdateWithoutReindex { updated_settings: old, quantize: false }
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
Ok(ret)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -417,8 +352,6 @@ impl EmbeddingSettings {
|
|||||||
|
|
||||||
pub const DISTRIBUTION: &'static str = "distribution";
|
pub const DISTRIBUTION: &'static str = "distribution";
|
||||||
|
|
||||||
pub const BINARY_QUANTIZED: &'static str = "binaryQuantized";
|
|
||||||
|
|
||||||
pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
|
pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
|
||||||
match field {
|
match field {
|
||||||
Self::SOURCE => &[
|
Self::SOURCE => &[
|
||||||
@@ -458,13 +391,6 @@ impl EmbeddingSettings {
|
|||||||
EmbedderSource::Rest,
|
EmbedderSource::Rest,
|
||||||
EmbedderSource::UserProvided,
|
EmbedderSource::UserProvided,
|
||||||
],
|
],
|
||||||
Self::BINARY_QUANTIZED => &[
|
|
||||||
EmbedderSource::HuggingFace,
|
|
||||||
EmbedderSource::Ollama,
|
|
||||||
EmbedderSource::OpenAi,
|
|
||||||
EmbedderSource::Rest,
|
|
||||||
EmbedderSource::UserProvided,
|
|
||||||
],
|
|
||||||
_other => unreachable!("unknown field"),
|
_other => unreachable!("unknown field"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -479,7 +405,6 @@ impl EmbeddingSettings {
|
|||||||
Self::DIMENSIONS,
|
Self::DIMENSIONS,
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
Self::URL,
|
Self::URL,
|
||||||
Self::BINARY_QUANTIZED,
|
|
||||||
],
|
],
|
||||||
EmbedderSource::HuggingFace => &[
|
EmbedderSource::HuggingFace => &[
|
||||||
Self::SOURCE,
|
Self::SOURCE,
|
||||||
@@ -487,7 +412,6 @@ impl EmbeddingSettings {
|
|||||||
Self::REVISION,
|
Self::REVISION,
|
||||||
Self::DOCUMENT_TEMPLATE,
|
Self::DOCUMENT_TEMPLATE,
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
Self::BINARY_QUANTIZED,
|
|
||||||
],
|
],
|
||||||
EmbedderSource::Ollama => &[
|
EmbedderSource::Ollama => &[
|
||||||
Self::SOURCE,
|
Self::SOURCE,
|
||||||
@@ -497,11 +421,8 @@ impl EmbeddingSettings {
|
|||||||
Self::API_KEY,
|
Self::API_KEY,
|
||||||
Self::DIMENSIONS,
|
Self::DIMENSIONS,
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
Self::BINARY_QUANTIZED,
|
|
||||||
],
|
],
|
||||||
EmbedderSource::UserProvided => {
|
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION],
|
||||||
&[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION, Self::BINARY_QUANTIZED]
|
|
||||||
}
|
|
||||||
EmbedderSource::Rest => &[
|
EmbedderSource::Rest => &[
|
||||||
Self::SOURCE,
|
Self::SOURCE,
|
||||||
Self::API_KEY,
|
Self::API_KEY,
|
||||||
@@ -512,7 +433,6 @@ impl EmbeddingSettings {
|
|||||||
Self::RESPONSE,
|
Self::RESPONSE,
|
||||||
Self::HEADERS,
|
Self::HEADERS,
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
Self::BINARY_QUANTIZED,
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -566,7 +486,7 @@ impl std::fmt::Display for EmbedderSource {
|
|||||||
|
|
||||||
impl From<EmbeddingConfig> for EmbeddingSettings {
|
impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||||
fn from(value: EmbeddingConfig) -> Self {
|
fn from(value: EmbeddingConfig) -> Self {
|
||||||
let EmbeddingConfig { embedder_options, prompt, quantized } = value;
|
let EmbeddingConfig { embedder_options, prompt } = value;
|
||||||
let document_template_max_bytes =
|
let document_template_max_bytes =
|
||||||
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
||||||
match embedder_options {
|
match embedder_options {
|
||||||
@@ -587,7 +507,6 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
binary_quantized: Setting::some_or_not_set(quantized),
|
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
url,
|
url,
|
||||||
@@ -608,7 +527,6 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
binary_quantized: Setting::some_or_not_set(quantized),
|
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||||
embedding_model,
|
embedding_model,
|
||||||
@@ -629,7 +547,6 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
binary_quantized: Setting::some_or_not_set(quantized),
|
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
dimensions,
|
dimensions,
|
||||||
@@ -647,7 +564,6 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
binary_quantized: Setting::some_or_not_set(quantized),
|
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key,
|
api_key,
|
||||||
@@ -670,7 +586,6 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::Set(response),
|
response: Setting::Set(response),
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
headers: Setting::Set(headers),
|
headers: Setting::Set(headers),
|
||||||
binary_quantized: Setting::some_or_not_set(quantized),
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -692,11 +607,8 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
binary_quantized,
|
|
||||||
} = value;
|
} = value;
|
||||||
|
|
||||||
this.quantized = binary_quantized.set();
|
|
||||||
|
|
||||||
if let Some(source) = source.set() {
|
if let Some(source) = source.set() {
|
||||||
match source {
|
match source {
|
||||||
EmbedderSource::OpenAi => {
|
EmbedderSource::OpenAi => {
|
||||||
|
|||||||
@@ -77,8 +77,7 @@
|
|||||||
"q": "puppy cute comforting movie",
|
"q": "puppy cute comforting movie",
|
||||||
"limit": 100,
|
"limit": 100,
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"semanticRatio": 0.1,
|
"semanticRatio": 0.1
|
||||||
"embedder": "default"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -92,8 +91,7 @@
|
|||||||
"q": "puppy cute comforting movie",
|
"q": "puppy cute comforting movie",
|
||||||
"limit": 100,
|
"limit": 100,
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"semanticRatio": 0.5,
|
"semanticRatio": 0.5
|
||||||
"embedder": "default"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -107,8 +105,7 @@
|
|||||||
"q": "puppy cute comforting movie",
|
"q": "puppy cute comforting movie",
|
||||||
"limit": 100,
|
"limit": 100,
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"semanticRatio": 0.9,
|
"semanticRatio": 0.9
|
||||||
"embedder": "default"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -122,8 +119,7 @@
|
|||||||
"q": "puppy cute comforting movie",
|
"q": "puppy cute comforting movie",
|
||||||
"limit": 100,
|
"limit": 100,
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"semanticRatio": 1.0,
|
"semanticRatio": 1.0
|
||||||
"embedder": "default"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -137,8 +133,7 @@
|
|||||||
"q": "shrek",
|
"q": "shrek",
|
||||||
"limit": 100,
|
"limit": 100,
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"semanticRatio": 1.0,
|
"semanticRatio": 1.0
|
||||||
"embedder": "default"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -152,8 +147,7 @@
|
|||||||
"q": "shrek",
|
"q": "shrek",
|
||||||
"limit": 100,
|
"limit": 100,
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"semanticRatio": 0.5,
|
"semanticRatio": 0.5
|
||||||
"embedder": "default"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -167,8 +161,7 @@
|
|||||||
"q": "shrek",
|
"q": "shrek",
|
||||||
"limit": 100,
|
"limit": 100,
|
||||||
"hybrid": {
|
"hybrid": {
|
||||||
"semanticRatio": 0.1,
|
"semanticRatio": 0.1
|
||||||
"embedder": "default"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user