mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-26 15:59:10 +00:00
Compare commits
42 Commits
control-be
...
bug-phrase
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e134d03a14 | ||
|
|
e78da35287 | ||
|
|
7f20c13f3f | ||
|
|
462a2329f1 | ||
|
|
f6483cf15d | ||
|
|
bd34ed01d9 | ||
|
|
74199f328d | ||
|
|
1113c42de0 | ||
|
|
465afe01b2 | ||
|
|
7d6768e4c4 | ||
|
|
f77661ec44 | ||
|
|
b8fd85a46d | ||
|
|
fd43c6c404 | ||
|
|
2564ec1496 | ||
|
|
b6b73fe41c | ||
|
|
6dde41cc46 | ||
|
|
163f8023a1 | ||
|
|
2b120b89e4 | ||
|
|
84f842233d | ||
|
|
633537ccd7 | ||
|
|
e8d7c00d30 | ||
|
|
3f6301dbc9 | ||
|
|
ca71b63ed1 | ||
|
|
2b6952eda1 | ||
|
|
79f29eed3c | ||
|
|
cc45e264ca | ||
|
|
5f474a640d | ||
|
|
bbaee3dbc6 | ||
|
|
877717cb26 | ||
|
|
716817122a | ||
|
|
1120a5296c | ||
|
|
a35a339c3d | ||
|
|
cac5836f6f | ||
|
|
5239ae0297 | ||
|
|
2fdb1d8018 | ||
|
|
3c5e363554 | ||
|
|
da0dd6febf | ||
|
|
e098cc8320 | ||
|
|
ec815fa368 | ||
|
|
4a922a176f | ||
|
|
51bc7b3173 | ||
|
|
dcb61f8b3a |
16
Cargo.lock
generated
16
Cargo.lock
generated
@@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
|
||||
source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"heed",
|
||||
"log",
|
||||
"memmap2",
|
||||
"nohash",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"rayon",
|
||||
@@ -933,9 +933,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.9.0"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d"
|
||||
checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"csv",
|
||||
@@ -2838,7 +2838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.48.1",
|
||||
"windows-targets 0.52.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3686,6 +3686,12 @@ version = "0.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
||||
|
||||
[[package]]
|
||||
name = "nohash"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
|
||||
@@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co
|
||||
## ✨ Features
|
||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
|
||||
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
@@ -255,6 +255,8 @@ pub(crate) mod test {
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(vector_index.settings().unwrap());
|
||||
|
||||
{
|
||||
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
||||
let mut documents = documents.unwrap();
|
||||
|
||||
@@ -1,783 +1,56 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: document
|
||||
expression: vector_index.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"id": "e3",
|
||||
"desc": "overriden vector + map",
|
||||
"_vectors": {
|
||||
"default": [
|
||||
0.2,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1
|
||||
],
|
||||
"toto": [
|
||||
0.1
|
||||
]
|
||||
}
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"sort",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"nonSeparatorTokens": [],
|
||||
"separatorTokens": [],
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
"oneTypo": 5,
|
||||
"twoTypos": 9
|
||||
},
|
||||
"disableOnWords": [],
|
||||
"disableOnAttributes": []
|
||||
},
|
||||
"faceting": {
|
||||
"maxValuesPerFacet": 100,
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
}
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {
|
||||
"default": {
|
||||
"source": "huggingFace",
|
||||
"model": "BAAI/bge-base-en-v1.5",
|
||||
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
|
||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
|
||||
}
|
||||
},
|
||||
"searchCutoffMs": null
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,780 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: document
|
||||
---
|
||||
{
|
||||
"id": "e0",
|
||||
"desc": "overriden vector",
|
||||
"_vectors": {
|
||||
"default": [
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -74,7 +74,8 @@ impl Display for IndexUidFormatError {
|
||||
f,
|
||||
"invalid index uid `{}`, the uid must be an integer \
|
||||
or a string containing only alphanumeric characters \
|
||||
a-z A-Z 0-9, hyphens - and underscores _.",
|
||||
a-z A-Z 0-9, hyphens - and underscores _, \
|
||||
and can not be more than 400 bytes.",
|
||||
self.invalid_uid,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -74,7 +74,8 @@ impl Display for IndexUidFormatError {
|
||||
f,
|
||||
"invalid index uid `{}`, the uid must be an integer \
|
||||
or a string containing only alphanumeric characters \
|
||||
a-z A-Z 0-9, hyphens - and underscores _.",
|
||||
a-z A-Z 0-9, hyphens - and underscores _, \
|
||||
and can not be more than 400 bytes.",
|
||||
self.invalid_uid,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.4.0"
|
||||
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
|
||||
@@ -101,7 +101,7 @@ pub enum Error {
|
||||
)]
|
||||
InvalidTaskCanceledBy { canceled_by: String },
|
||||
#[error(
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)."
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
|
||||
)]
|
||||
InvalidIndexUid { index_uid: String },
|
||||
#[error("Task `{0}` not found.")]
|
||||
|
||||
@@ -1477,7 +1477,7 @@ impl IndexScheduler {
|
||||
.map(
|
||||
|IndexEmbeddingConfig {
|
||||
name,
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||
..
|
||||
}| {
|
||||
let prompt =
|
||||
@@ -1486,7 +1486,10 @@ impl IndexScheduler {
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
return Ok((name, (embedder.clone(), prompt)));
|
||||
return Ok((
|
||||
name,
|
||||
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1500,7 +1503,7 @@ impl IndexScheduler {
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
Ok((name, (embedder, prompt)))
|
||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
@@ -5197,7 +5200,7 @@ mod tests {
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders(configs).unwrap();
|
||||
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
||||
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
||||
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
||||
@@ -5519,6 +5522,7 @@ mod tests {
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[1, 2]>,
|
||||
},
|
||||
@@ -5531,28 +5535,8 @@ mod tests {
|
||||
|
||||
// the document with the id 3 should keep its original embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let mut embeddings = Vec::new();
|
||||
|
||||
'vectors: for i in 0..=u8::MAX {
|
||||
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose();
|
||||
|
||||
let Some(reader) = reader else {
|
||||
break 'vectors;
|
||||
};
|
||||
|
||||
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
|
||||
if let Some(embedding) = embedding {
|
||||
embeddings.push(embedding)
|
||||
} else {
|
||||
break 'vectors;
|
||||
}
|
||||
}
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embeddings = &embeddings["my_doggo_embedder"];
|
||||
|
||||
snapshot!(embeddings.len(), @"1");
|
||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||
@@ -5737,6 +5721,7 @@ mod tests {
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[0]>,
|
||||
},
|
||||
@@ -5780,6 +5765,7 @@ mod tests {
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[]>,
|
||||
},
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -66,3 +66,5 @@ khmer = ["milli/khmer"]
|
||||
vietnamese = ["milli/vietnamese"]
|
||||
# force swedish character recomposition
|
||||
swedish-recomposition = ["milli/swedish-recomposition"]
|
||||
# force german character recomposition
|
||||
german = ["milli/german"]
|
||||
|
||||
@@ -395,7 +395,10 @@ impl ErrorCode for milli::Error {
|
||||
| UserError::InvalidSettingsDimensions { .. }
|
||||
| UserError::InvalidUrl { .. }
|
||||
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
||||
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
||||
| UserError::InvalidPrompt(_)
|
||||
| UserError::InvalidDisableBinaryQuantization { .. } => {
|
||||
Code::InvalidSettingsEmbedders
|
||||
}
|
||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||
@@ -540,7 +543,8 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||
f,
|
||||
"the value of `id` is invalid. \
|
||||
A document identifier can be of type integer or string, \
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||
and can not be more than 512 bytes."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,7 +88,8 @@ impl fmt::Display for IndexUidFormatError {
|
||||
f,
|
||||
"`{}` is not a valid index uid. Index uid can be an \
|
||||
integer or a string containing only alphanumeric \
|
||||
characters, hyphens (-) and underscores (_).",
|
||||
characters, hyphens (-) and underscores (_), \
|
||||
and can not be more than 512 bytes.",
|
||||
self.invalid_uid,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -39,12 +39,14 @@ macro_rules! make_locale {
|
||||
pub enum Locale {
|
||||
$($iso_639_1,)+
|
||||
$($iso_639_3,)+
|
||||
Cmn,
|
||||
}
|
||||
|
||||
impl From<milli::tokenizer::Language> for Locale {
|
||||
fn from(other: milli::tokenizer::Language) -> Locale {
|
||||
match other {
|
||||
$(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+
|
||||
milli::tokenizer::Language::Cmn => Locale::Cmn,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -54,6 +56,7 @@ macro_rules! make_locale {
|
||||
match other {
|
||||
$(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+
|
||||
$(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+
|
||||
Locale::Cmn => milli::tokenizer::Language::Cmn,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -65,6 +68,7 @@ macro_rules! make_locale {
|
||||
let locale = match s {
|
||||
$($iso_639_1_str => Locale::$iso_639_1,)+
|
||||
$($iso_639_3_str => Locale::$iso_639_3,)+
|
||||
"cmn" => Locale::Cmn,
|
||||
_ => return Err(LocaleFormatError { invalid_locale: s.to_string() }),
|
||||
};
|
||||
|
||||
@@ -79,8 +83,9 @@ macro_rules! make_locale {
|
||||
|
||||
impl std::fmt::Display for LocaleFormatError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", ");
|
||||
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales)
|
||||
let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"];
|
||||
valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right)));
|
||||
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", "))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,7 +104,6 @@ make_locale!(
|
||||
(Bg, "bg") => (Bul, "bul"),
|
||||
(Ca, "ca") => (Cat, "cat"),
|
||||
(Cs, "cs") => (Ces, "ces"),
|
||||
(Zh, "zh") => (Cmn, "cmn"),
|
||||
(Da, "da") => (Dan, "dan"),
|
||||
(De, "de") => (Deu, "deu"),
|
||||
(El, "el") => (Ell, "ell"),
|
||||
@@ -157,5 +161,6 @@ make_locale!(
|
||||
(Uz, "uz") => (Uzb, "uzb"),
|
||||
(Vi, "vi") => (Vie, "vie"),
|
||||
(Yi, "yi") => (Yid, "yid"),
|
||||
(Zh, "zh") => (Zho, "zho"),
|
||||
(Zu, "zu") => (Zul, "zul"),
|
||||
);
|
||||
|
||||
@@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
vietnamese = ["meilisearch-types/vietnamese"]
|
||||
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
||||
german = ["meilisearch-types/german"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
|
||||
|
||||
@@ -646,8 +646,6 @@ pub struct SearchAggregator {
|
||||
max_vector_size: usize,
|
||||
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
||||
semantic_ratio: bool,
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
hybrid: bool,
|
||||
retrieve_vectors: bool,
|
||||
|
||||
@@ -795,7 +793,6 @@ impl SearchAggregator {
|
||||
|
||||
if let Some(hybrid) = hybrid {
|
||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||
ret.embedder = hybrid.embedder.is_some();
|
||||
ret.hybrid = true;
|
||||
}
|
||||
|
||||
@@ -863,7 +860,6 @@ impl SearchAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
@@ -923,7 +919,6 @@ impl SearchAggregator {
|
||||
self.retrieve_vectors |= retrieve_vectors;
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.embedder |= embedder;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
@@ -999,7 +994,6 @@ impl SearchAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
@@ -1051,7 +1045,6 @@ impl SearchAggregator {
|
||||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
"semantic_ratio": semantic_ratio,
|
||||
"embedder": embedder,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
@@ -1782,7 +1775,6 @@ pub struct SimilarAggregator {
|
||||
used_syntax: HashMap<String, usize>,
|
||||
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
retrieve_vectors: bool,
|
||||
|
||||
// pagination
|
||||
@@ -1803,7 +1795,7 @@ impl SimilarAggregator {
|
||||
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
|
||||
let SimilarQuery {
|
||||
id: _,
|
||||
embedder,
|
||||
embedder: _,
|
||||
offset,
|
||||
limit,
|
||||
attributes_to_retrieve: _,
|
||||
@@ -1851,7 +1843,6 @@ impl SimilarAggregator {
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||
|
||||
ret.embedder = embedder.is_some();
|
||||
ret.retrieve_vectors = *retrieve_vectors;
|
||||
|
||||
ret
|
||||
@@ -1883,7 +1874,6 @@ impl SimilarAggregator {
|
||||
max_attributes_to_retrieve,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
retrieve_vectors,
|
||||
} = other;
|
||||
@@ -1914,7 +1904,6 @@ impl SimilarAggregator {
|
||||
*used_syntax = used_syntax.saturating_add(value);
|
||||
}
|
||||
|
||||
self.embedder |= embedder;
|
||||
self.retrieve_vectors |= retrieve_vectors;
|
||||
|
||||
// pagination
|
||||
@@ -1948,7 +1937,6 @@ impl SimilarAggregator {
|
||||
max_attributes_to_retrieve,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
retrieve_vectors,
|
||||
} = self;
|
||||
@@ -1980,9 +1968,6 @@ impl SimilarAggregator {
|
||||
"vector": {
|
||||
"retrieve_vectors": retrieve_vectors,
|
||||
},
|
||||
"hybrid": {
|
||||
"embedder": embedder,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
"max_offset": max_offset,
|
||||
|
||||
@@ -72,7 +72,7 @@ pub enum MeilisearchHttpError {
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
||||
MissingSearchHybrid,
|
||||
}
|
||||
|
||||
|
||||
@@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SearchQueryGet> for SearchQuery {
|
||||
fn from(other: SearchQueryGet) -> Self {
|
||||
impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
type Error = ResponseError;
|
||||
|
||||
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
|
||||
let filter = match other.filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
@@ -140,19 +142,28 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
|
||||
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
||||
(None, None) => None,
|
||||
(None, Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
|
||||
(None, Some(_)) => {
|
||||
return Err(ResponseError::from_msg(
|
||||
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
|
||||
meilisearch_types::error::Code::InvalidHybridQuery,
|
||||
));
|
||||
}
|
||||
(Some(embedder), None) => {
|
||||
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
|
||||
}
|
||||
(Some(embedder), None) => Some(HybridQuery {
|
||||
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
|
||||
embedder: Some(embedder),
|
||||
}),
|
||||
(Some(embedder), Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
if other.vector.is_some() && hybrid.is_none() {
|
||||
return Err(ResponseError::from_msg(
|
||||
"`hybridEmbedder` is mandatory when `vector` is present".into(),
|
||||
meilisearch_types::error::Code::MissingSearchHybrid,
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
offset: other.offset.0,
|
||||
@@ -179,7 +190,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,7 +230,7 @@ pub async fn search_with_url_query(
|
||||
debug!(parameters = ?params, "Search get");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query: SearchQuery = params.into_inner().into();
|
||||
let mut query: SearchQuery = params.into_inner().try_into()?;
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
@@ -312,44 +323,36 @@ pub fn search_kind(
|
||||
features.check_vector("Passing `hybrid` as a parameter")?;
|
||||
}
|
||||
|
||||
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
||||
if query.vector.is_none() {
|
||||
match &query.q {
|
||||
Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly),
|
||||
None => return Ok(SearchKind::KeywordOnly),
|
||||
_ => {}
|
||||
// handle with care, the order of cases matters, the semantics is subtle
|
||||
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
||||
// empty query, no vector => placeholder search
|
||||
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
||||
// no query, no vector => placeholder search
|
||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid.semantic_ratio == 1.0 => vector
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
}
|
||||
|
||||
match &query.hybrid {
|
||||
Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => {
|
||||
Ok(SearchKind::semantic(
|
||||
index_scheduler,
|
||||
index,
|
||||
embedder.as_deref(),
|
||||
query.vector.as_ref().map(Vec::len),
|
||||
)?)
|
||||
}
|
||||
Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => {
|
||||
// hybrid.semantic_ratio == 0.0 => keyword
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
Ok(SearchKind::KeywordOnly)
|
||||
}
|
||||
Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid(
|
||||
// no query, hybrid, vector => semantic
|
||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
|
||||
}
|
||||
// query, no hybrid, no vector => keyword
|
||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||
// query, hybrid, maybe vector => hybrid
|
||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
index_scheduler,
|
||||
index,
|
||||
embedder.as_deref(),
|
||||
embedder,
|
||||
**semantic_ratio,
|
||||
query.vector.as_ref().map(Vec::len),
|
||||
)?),
|
||||
None => match (query.q.as_deref(), query.vector.as_deref()) {
|
||||
(_query, None) => Ok(SearchKind::KeywordOnly),
|
||||
(None, Some(_vector)) => Ok(SearchKind::semantic(
|
||||
index_scheduler,
|
||||
index,
|
||||
None,
|
||||
query.vector.as_ref().map(Vec::len),
|
||||
)?),
|
||||
(Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
},
|
||||
v.map(|v| v.len()),
|
||||
),
|
||||
|
||||
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -643,12 +643,19 @@ fn embedder_analytics(
|
||||
.max()
|
||||
});
|
||||
|
||||
let binary_quantization_used = setting.as_ref().map(|map| {
|
||||
map.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.any(|config| config.binary_quantized.set().is_some())
|
||||
});
|
||||
|
||||
json!(
|
||||
{
|
||||
"total": setting.as_ref().map(|s| s.len()),
|
||||
"sources": sources,
|
||||
"document_template_used": document_template_used,
|
||||
"document_template_max_bytes": document_template_max_bytes
|
||||
"document_template_max_bytes": document_template_max_bytes,
|
||||
"binary_quantization_used": binary_quantization_used,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -102,8 +102,8 @@ async fn similar(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let (embedder_name, embedder) =
|
||||
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||
let (embedder_name, embedder, quantized) =
|
||||
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
perform_similar(
|
||||
@@ -111,6 +111,7 @@ async fn similar(
|
||||
query,
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
@@ -139,8 +140,8 @@ pub struct SimilarQueryGet {
|
||||
show_ranking_score_details: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub embedder: Option<String>,
|
||||
#[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub embedder: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
|
||||
@@ -616,7 +616,7 @@ mod tests {
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -628,7 +628,7 @@ mod tests {
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
|
||||
@@ -267,58 +267,54 @@ impl fmt::Debug for SearchQuery {
|
||||
pub struct HybridQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
||||
pub semantic_ratio: SemanticRatio,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||
pub embedder: Option<String>,
|
||||
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
|
||||
pub embedder: String,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum SearchKind {
|
||||
KeywordOnly,
|
||||
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
||||
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
||||
SemanticOnly { embedder_name: String, embedder: Arc<Embedder>, quantized: bool },
|
||||
Hybrid { embedder_name: String, embedder: Arc<Embedder>, quantized: bool, semantic_ratio: f32 },
|
||||
}
|
||||
|
||||
impl SearchKind {
|
||||
pub(crate) fn semantic(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index: &Index,
|
||||
embedder_name: Option<&str>,
|
||||
embedder_name: &str,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<Self, ResponseError> {
|
||||
let (embedder_name, embedder) =
|
||||
let (embedder_name, embedder, quantized) =
|
||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||
Ok(Self::SemanticOnly { embedder_name, embedder })
|
||||
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
|
||||
}
|
||||
|
||||
pub(crate) fn hybrid(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index: &Index,
|
||||
embedder_name: Option<&str>,
|
||||
embedder_name: &str,
|
||||
semantic_ratio: f32,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<Self, ResponseError> {
|
||||
let (embedder_name, embedder) =
|
||||
let (embedder_name, embedder, quantized) =
|
||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
|
||||
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
|
||||
}
|
||||
|
||||
pub(crate) fn embedder(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index: &Index,
|
||||
embedder_name: Option<&str>,
|
||||
embedder_name: &str,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<(String, Arc<Embedder>), ResponseError> {
|
||||
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name());
|
||||
|
||||
let embedder = embedders.get(embedder_name);
|
||||
|
||||
let embedder = embedder
|
||||
let (embedder, _, quantized) = embedders
|
||||
.get(embedder_name)
|
||||
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
||||
.map_err(milli::Error::from)?
|
||||
.0;
|
||||
.map_err(milli::Error::from)?;
|
||||
|
||||
if let Some(vector_len) = vector_len {
|
||||
if vector_len != embedder.dimensions() {
|
||||
@@ -332,7 +328,7 @@ impl SearchKind {
|
||||
}
|
||||
}
|
||||
|
||||
Ok((embedder_name.to_owned(), embedder))
|
||||
Ok((embedder_name.to_owned(), embedder, quantized))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -538,8 +534,8 @@ pub struct SimilarQuery {
|
||||
pub limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||
pub embedder: Option<String>,
|
||||
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
|
||||
pub embedder: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
|
||||
@@ -793,7 +789,7 @@ fn prepare_search<'t>(
|
||||
search.query(q);
|
||||
}
|
||||
}
|
||||
SearchKind::SemanticOnly { embedder_name, embedder } => {
|
||||
SearchKind::SemanticOnly { embedder_name, embedder, quantized } => {
|
||||
let vector = match query.vector.clone() {
|
||||
Some(vector) => vector,
|
||||
None => {
|
||||
@@ -807,14 +803,19 @@ fn prepare_search<'t>(
|
||||
}
|
||||
};
|
||||
|
||||
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
|
||||
search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
|
||||
}
|
||||
SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => {
|
||||
SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
|
||||
if let Some(q) = &query.q {
|
||||
search.query(q);
|
||||
}
|
||||
// will be embedded in hybrid search if necessary
|
||||
search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone());
|
||||
search.semantic(
|
||||
embedder_name.clone(),
|
||||
embedder.clone(),
|
||||
*quantized,
|
||||
query.vector.clone(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1443,6 +1444,7 @@ pub fn perform_similar(
|
||||
query: SimilarQuery,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
quantized: bool,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
@@ -1471,8 +1473,16 @@ pub fn perform_similar(
|
||||
));
|
||||
};
|
||||
|
||||
let mut similar =
|
||||
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
||||
let mut similar = milli::Similar::new(
|
||||
internal_id,
|
||||
offset,
|
||||
limit,
|
||||
index,
|
||||
&rtxn,
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
);
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
||||
|
||||
@@ -1023,7 +1023,7 @@ async fn error_document_add_create_index_bad_uid() {
|
||||
snapshot!(json_string!(response),
|
||||
@r###"
|
||||
{
|
||||
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -1280,7 +1280,7 @@ async fn error_add_documents_bad_document_id() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||
|
||||
@@ -11,7 +11,7 @@ async fn error_document_update_create_index_bad_uid() {
|
||||
let (response, code) = index.update_documents(json!([{"id": 1}]), None).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -173,7 +173,7 @@ async fn error_update_documents_bad_document_id() {
|
||||
assert_eq!(
|
||||
response["error"]["message"],
|
||||
json!(
|
||||
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."#
|
||||
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
|
||||
)
|
||||
);
|
||||
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
||||
|
||||
@@ -125,11 +125,11 @@ async fn create_index_with_primary_key() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn create_index_with_invalid_primary_key() {
|
||||
let document = json!([ { "id": 2, "title": "Pride and Prejudice" } ]);
|
||||
let documents = json!([ { "id": 2, "title": "Pride and Prejudice" } ]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("movies");
|
||||
let (_response, code) = index.add_documents(document, Some("title")).await;
|
||||
let (_response, code) = index.add_documents(documents, Some("title")).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(0).await;
|
||||
@@ -137,6 +137,17 @@ async fn create_index_with_invalid_primary_key() {
|
||||
let (response, code) = index.get().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["primaryKey"], json!(null));
|
||||
|
||||
let documents = json!([ { "id": "e".repeat(513) } ]);
|
||||
|
||||
let (_response, code) = index.add_documents(documents, Some("id")).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index.get().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["primaryKey"], json!(null));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -192,7 +203,7 @@ async fn error_create_with_invalid_index_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
|
||||
@@ -75,7 +75,7 @@ async fn create_index_bad_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -136,7 +136,7 @@ async fn get_index_bad_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -232,7 +232,7 @@ async fn update_index_bad_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -247,7 +247,7 @@ async fn delete_index_bad_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
|
||||
@@ -186,7 +186,7 @@ async fn get_invalid_index_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
|
||||
@@ -128,7 +128,7 @@ async fn simple_search() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -137,7 +137,7 @@ async fn simple_search() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -146,7 +146,7 @@ async fn simple_search() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -161,7 +161,7 @@ async fn limit_offset() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -174,7 +174,7 @@ async fn limit_offset() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -188,8 +188,11 @@ async fn simple_search_hf() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
@@ -197,7 +200,7 @@ async fn simple_search_hf() {
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
// disable ranking score as the vectors between architectures are not equal
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
|
||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -206,7 +209,7 @@ async fn simple_search_hf() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
|
||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -215,7 +218,7 @@ async fn simple_search_hf() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
||||
json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -224,7 +227,7 @@ async fn simple_search_hf() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
||||
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -237,7 +240,7 @@ async fn distribution_shift() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
|
||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true});
|
||||
let (response, code) = index.search_post(search.clone()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
||||
@@ -271,7 +274,7 @@ async fn highlighter() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.2},
|
||||
"hybrid": {"embedder": "default", "semanticRatio": 0.2},
|
||||
"retrieveVectors": true,
|
||||
"attributesToHighlight": [
|
||||
"desc",
|
||||
@@ -287,7 +290,7 @@ async fn highlighter() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.8},
|
||||
"hybrid": {"embedder": "default", "semanticRatio": 0.8},
|
||||
"retrieveVectors": true,
|
||||
"showRankingScore": true,
|
||||
"attributesToHighlight": [
|
||||
@@ -304,7 +307,7 @@ async fn highlighter() {
|
||||
// no highlighting on full semantic
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 1.0},
|
||||
"hybrid": {"embedder": "default", "semanticRatio": 1.0},
|
||||
"retrieveVectors": true,
|
||||
"showRankingScore": true,
|
||||
"attributesToHighlight": [
|
||||
@@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() {
|
||||
let (response, code) = index
|
||||
.search_get(
|
||||
&yaup::to_string(
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}),
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
@@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() {
|
||||
let (response, code) = index
|
||||
.search_get(
|
||||
&yaup::to_string(
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}),
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
@@ -398,7 +401,7 @@ async fn single_document() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -414,7 +417,7 @@ async fn query_combination() {
|
||||
|
||||
// search without query and vector, but with hybrid => still placeholder
|
||||
let (response, code) = index
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -423,7 +426,7 @@ async fn query_combination() {
|
||||
|
||||
// same with a different semantic ratio
|
||||
let (response, code) = index
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -432,7 +435,7 @@ async fn query_combination() {
|
||||
|
||||
// wrong vector dimensions
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@@ -447,7 +450,7 @@ async fn query_combination() {
|
||||
|
||||
// full vector
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -456,7 +459,7 @@ async fn query_combination() {
|
||||
|
||||
// full keyword, without a query
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -465,7 +468,7 @@ async fn query_combination() {
|
||||
|
||||
// query + vector, full keyword => keyword
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -480,7 +483,7 @@ async fn query_combination() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.",
|
||||
"message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
|
||||
"code": "missing_search_hybrid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
|
||||
@@ -490,7 +493,7 @@ async fn query_combination() {
|
||||
// full vector, without a vector => error
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -507,7 +510,7 @@ async fn query_combination() {
|
||||
// hybrid without a vector => full keyword
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -523,7 +526,7 @@ async fn retrieve_vectors() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -573,7 +576,7 @@ async fn retrieve_vectors() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
|
||||
@@ -922,7 +922,7 @@ async fn invalid_locales() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`",
|
||||
"message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`",
|
||||
"code": "invalid_search_locales",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_locales"
|
||||
@@ -935,7 +935,7 @@ async fn invalid_locales() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul",
|
||||
"message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bg, bn, ca, cs, da, de, el, en, eo, es, et, fa, fi, fr, gu, he, hi, hr, hu, hy, id, it, ja, jv, ka, km, kn, ko, la, lt, lv, mk, ml, mr, my, nb, ne, nl, or, pa, pl, pt, ro, ru, si, sk, sl, sn, sr, sv, ta, te, th, tk, tl, tr, uk, ur, uz, vi, yi, zh, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zho, zul",
|
||||
"code": "invalid_search_locales",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_locales"
|
||||
@@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() {
|
||||
.await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`",
|
||||
"message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`",
|
||||
"code": "invalid_settings_localized_attributes",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
|
||||
@@ -1143,3 +1143,195 @@ async fn facet_search_with_localized_attributes() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn swedish_search() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let index = server.index("test");
|
||||
let documents = json!([
|
||||
{"id": "tra1-1", "product": "trä"},
|
||||
{"id": "tra2-1", "product": "traktor"},
|
||||
{"id": "tra1-2", "product": "träbjälke"},
|
||||
{"id": "tra2-2", "product": "trafiksignal"},
|
||||
]);
|
||||
index.add_documents(documents, None).await;
|
||||
let (_response, _) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["product"],
|
||||
"localizedAttributes": [
|
||||
// force swedish
|
||||
{"attributePatterns": ["product"], "locales": ["swe"]}
|
||||
]
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// infer swedish
|
||||
index
|
||||
.search(json!({"q": "trä", "attributesToRetrieve": ["product"]}), |response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"product": "trä"
|
||||
},
|
||||
{
|
||||
"product": "träbjälke"
|
||||
}
|
||||
],
|
||||
"query": "trä",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"200 OK");
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "tra", "attributesToRetrieve": ["product"]}), |response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"product": "traktor"
|
||||
},
|
||||
{
|
||||
"product": "trafiksignal"
|
||||
}
|
||||
],
|
||||
"query": "tra",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"200 OK");
|
||||
})
|
||||
.await;
|
||||
|
||||
// force swedish
|
||||
index
|
||||
.search(
|
||||
json!({"q": "trä", "locales": ["swe"], "attributesToRetrieve": ["product"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"product": "trä"
|
||||
},
|
||||
{
|
||||
"product": "träbjälke"
|
||||
}
|
||||
],
|
||||
"query": "trä",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"200 OK");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
index
|
||||
.search(
|
||||
json!({"q": "tra", "locales": ["swe"], "attributesToRetrieve": ["product"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"product": "traktor"
|
||||
},
|
||||
{
|
||||
"product": "trafiksignal"
|
||||
}
|
||||
],
|
||||
"query": "tra",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"200 OK");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn german_search() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let index = server.index("test");
|
||||
let documents = json!([
|
||||
{"id": 1, "product": "Interkulturalität"},
|
||||
{"id": 2, "product": "Wissensorganisation"},
|
||||
]);
|
||||
index.add_documents(documents, None).await;
|
||||
let (_response, _) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["product"],
|
||||
"localizedAttributes": [
|
||||
// force swedish
|
||||
{"attributePatterns": ["product"], "locales": ["deu"]}
|
||||
]
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// infer swedish
|
||||
index
|
||||
.search(
|
||||
json!({"q": "kulturalität", "attributesToRetrieve": ["product"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"product": "Interkulturalität"
|
||||
}
|
||||
],
|
||||
"query": "kulturalität",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"200 OK");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "organisation", "attributesToRetrieve": ["product"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"product": "Wissensorganisation"
|
||||
}
|
||||
],
|
||||
"query": "organisation",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"200 OK");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -1099,22 +1099,28 @@ async fn experimental_feature_vector_store() {
|
||||
index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(json!({
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"hybrid": {
|
||||
"embedder": "manual",
|
||||
},
|
||||
"showRankingScore": true
|
||||
}), |response, code|{
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
})
|
||||
}))
|
||||
.await;
|
||||
|
||||
{
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
index
|
||||
.search(json!({
|
||||
"retrieveVectors": true,
|
||||
@@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"hybrid": {
|
||||
"embedder": "manual",
|
||||
},
|
||||
"showRankingScore": true,
|
||||
"retrieveVectors": true,
|
||||
}))
|
||||
|
||||
@@ -412,7 +412,7 @@ async fn simple_search_illegal_index_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
insta::assert_json_snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -437,7 +437,7 @@ async fn federation_search_illegal_index_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
insta::assert_json_snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
|
||||
@@ -367,3 +367,50 @@ async fn search_on_exact_field() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn phrase_search_on_title() {
|
||||
let server = Server::new().await;
|
||||
let documents = json!([
|
||||
{ "id": 8, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 5, "desc": "Document Review", "title": "Document Review Attorney" },
|
||||
{ "id": 4, "desc": "Document Review", "title": "Document Review Manager - Cyber Incident Response (Remote)" },
|
||||
{ "id": 3, "desc": "Document Review", "title": "Document Review Paralegal" },
|
||||
{ "id": 2, "desc": "Document Review", "title": "Document Controller (Saudi National)" },
|
||||
{ "id": 1, "desc": "Document Review", "title": "Document Reviewer" },
|
||||
{ "id": 7, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 6, "desc": "Document Review", "title": "Document Review (Entry Level)" }
|
||||
]);
|
||||
let index = index_with_documents(&server, &documents).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "\"Document Review\"", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["title"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Attorney"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Manager - Cyber Incident Response (Remote)"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Paralegal"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review (Entry Level)"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -330,7 +330,7 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
|
||||
@@ -18,7 +18,7 @@ async fn similar_unexisting_index() {
|
||||
});
|
||||
|
||||
index
|
||||
.similar(json!({"id": 287947}), |response, code| {
|
||||
.similar(json!({"id": 287947, "embedder": "manual"}), |response, code| {
|
||||
assert_eq!(code, 404);
|
||||
assert_eq!(response, expected_response);
|
||||
})
|
||||
@@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": 287947})).await;
|
||||
let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -79,7 +79,7 @@ async fn similar_bad_id() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
@@ -172,7 +172,7 @@ async fn similar_invalid_id() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
@@ -199,7 +199,8 @@ async fn similar_not_found_id() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
|
||||
let (response, code) =
|
||||
index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -230,7 +231,8 @@ async fn similar_bad_offset() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
|
||||
let (response, code) =
|
||||
index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -241,7 +243,7 @@ async fn similar_bad_offset() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
|
||||
let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -272,7 +274,8 @@ async fn similar_bad_limit() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
|
||||
let (response, code) =
|
||||
index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -283,7 +286,7 @@ async fn similar_bad_limit() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
|
||||
let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -323,7 +326,8 @@ async fn similar_bad_filter() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
|
||||
let (response, code) =
|
||||
index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -361,7 +365,7 @@ async fn filter_invalid_syntax_object() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
|
||||
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
@@ -400,7 +404,7 @@ async fn filter_invalid_syntax_array() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
|
||||
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
@@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() {
|
||||
});
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
|
||||
json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
@@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() {
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
|
||||
let (response, code) =
|
||||
index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
|
||||
let (response, code) =
|
||||
index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
|
||||
@@ -80,9 +80,11 @@ async fn basic() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
.similar(
|
||||
json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
@@ -154,13 +156,16 @@ async fn basic() {
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
.similar(
|
||||
json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
@@ -232,7 +237,8 @@ async fn basic() {
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -272,7 +278,7 @@ async fn ranking_score_threshold() {
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
|
||||
@@ -358,7 +364,7 @@ async fn ranking_score_threshold() {
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
|
||||
@@ -426,7 +432,7 @@ async fn ranking_score_threshold() {
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
|
||||
@@ -476,7 +482,7 @@ async fn ranking_score_threshold() {
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
|
||||
@@ -508,7 +514,7 @@ async fn ranking_score_threshold() {
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
@@ -553,7 +559,7 @@ async fn filter() {
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
|
||||
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
@@ -617,7 +623,7 @@ async fn filter() {
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
|
||||
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
@@ -681,9 +687,11 @@ async fn limit_and_offset() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
.similar(
|
||||
json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
@@ -704,12 +712,13 @@ async fn limit_and_offset() {
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
|
||||
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
|
||||
@@ -173,7 +173,7 @@ async fn task_bad_index_uids() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -184,7 +184,7 @@ async fn task_bad_index_uids() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
@@ -195,7 +195,7 @@ async fn task_bad_index_uids() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
|
||||
380
meilisearch/tests/vector/binary_quantized.rs
Normal file
380
meilisearch/tests/vector/binary_quantized.rs
Normal file
@@ -0,0 +1,380 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use crate::common::{GetAllDocumentsOptions, Server};
|
||||
use crate::json;
|
||||
use crate::vector::generate_default_user_provided_documents;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn retrieve_binary_quantize_status_in_the_settings() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
"binaryQuantized": false,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
"binaryQuantized": true,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn binary_quantize_before_sending_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
"binaryQuantized": true,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
// Make sure the documents are binary quantized
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-1.0,
|
||||
-1.0,
|
||||
1.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
-1.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn binary_quantize_after_sending_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
"binaryQuantized": true,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// Make sure the documents are binary quantized
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-1.0,
|
||||
-1.0,
|
||||
1.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
-1.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn try_to_disable_binary_quantization() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
"binaryQuantized": true,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
"binaryQuantized": false,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let ret = server.wait_task(response.uid()).await;
|
||||
snapshot!(ret, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
"binaryQuantized": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
||||
"code": "invalid_settings_embedders",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn binary_quantize_clear_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = generate_default_user_provided_documents(&server).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"binaryQuantized": true,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (value, _code) = index.clear_all_documents().await;
|
||||
index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
// Make sure the documents DB has been cleared
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 0
|
||||
}
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
let (documents, _code) =
|
||||
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
|
||||
snapshot!(documents, @r###"
|
||||
{
|
||||
"hits": [],
|
||||
"query": "",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 0,
|
||||
"semanticHitCount": 0
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
mod binary_quantized;
|
||||
mod openai;
|
||||
mod rest;
|
||||
mod settings;
|
||||
@@ -624,7 +625,8 @@ async fn clear_documents() {
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||
let (documents, _code) =
|
||||
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
|
||||
snapshot!(documents, @r###"
|
||||
{
|
||||
"hits": [],
|
||||
@@ -685,7 +687,11 @@ async fn add_remove_one_vector_4588() {
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, name: "document-deleted");
|
||||
|
||||
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
|
||||
let (documents, _code) = index
|
||||
.search_post(
|
||||
json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }),
|
||||
)
|
||||
.await;
|
||||
snapshot!(documents, @r###"
|
||||
{
|
||||
"hits": [
|
||||
|
||||
@@ -449,7 +449,7 @@ async fn it_works() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "chien de chasse",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -489,7 +489,7 @@ async fn it_works() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "petit chien",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -529,7 +529,7 @@ async fn it_works() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "grand chien de berger des montagnes",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -616,7 +616,7 @@ async fn tokenize_long_text() {
|
||||
"q": "grand chien de berger des montagnes",
|
||||
"showRankingScore": true,
|
||||
"attributesToRetrieve": ["id"],
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1064,7 +1064,7 @@ async fn smaller_dimensions() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "chien de chasse",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1104,7 +1104,7 @@ async fn smaller_dimensions() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "petit chien",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1144,7 +1144,7 @@ async fn smaller_dimensions() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "grand chien de berger des montagnes",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1295,7 +1295,7 @@ async fn small_embedding_model() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "chien de chasse",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1335,7 +1335,7 @@ async fn small_embedding_model() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "petit chien",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1375,7 +1375,7 @@ async fn small_embedding_model() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "grand chien de berger des montagnes",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1525,7 +1525,7 @@ async fn legacy_embedding_model() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "chien de chasse",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1565,7 +1565,7 @@ async fn legacy_embedding_model() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "petit chien",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1605,7 +1605,7 @@ async fn legacy_embedding_model() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "grand chien de berger des montagnes",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1756,7 +1756,7 @@ async fn it_still_works() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "chien de chasse",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1796,7 +1796,7 @@ async fn it_still_works() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "petit chien",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -1836,7 +1836,7 @@ async fn it_still_works() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "grand chien de berger des montagnes",
|
||||
"hybrid": {"semanticRatio": 1.0}
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
|
||||
@@ -218,7 +218,8 @@ async fn reset_embedder_documents() {
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||
let (documents, _code) =
|
||||
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"message": "Cannot find embedder with name `default`.",
|
||||
|
||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.9.1"
|
||||
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.0", default-features = false }
|
||||
charabia = { version = "0.9.1", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.13"
|
||||
deserr = "0.6.2"
|
||||
@@ -80,7 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
||||
tiktoken-rs = "0.5.9"
|
||||
liquid = "0.26.6"
|
||||
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
||||
arroy = "0.4.0"
|
||||
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
ureq = { version = "2.10.0", features = ["json"] }
|
||||
@@ -106,6 +106,8 @@ all-tokenizations = [
|
||||
"charabia/greek",
|
||||
"charabia/khmer",
|
||||
"charabia/vietnamese",
|
||||
"charabia/swedish-recomposition",
|
||||
"charabia/german-segmentation",
|
||||
]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
@@ -138,6 +140,9 @@ khmer = ["charabia/khmer"]
|
||||
# allow vietnamese specialized tokenization
|
||||
vietnamese = ["charabia/vietnamese"]
|
||||
|
||||
# allow german specialized tokenization
|
||||
german = ["charabia/german-segmentation"]
|
||||
|
||||
# force swedish character recomposition
|
||||
swedish-recomposition = ["charabia/swedish-recomposition"]
|
||||
|
||||
|
||||
@@ -150,12 +150,13 @@ fn starts_with(selector: &str, key: &str) -> bool {
|
||||
// FIXME: move to a DocumentId struct
|
||||
|
||||
fn validate_document_id(document_id: &str) -> Option<&str> {
|
||||
if !document_id.is_empty()
|
||||
&& document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
|
||||
if document_id.is_empty()
|
||||
|| document_id.len() > 512
|
||||
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
||||
{
|
||||
Some(document_id)
|
||||
} else {
|
||||
None
|
||||
} else {
|
||||
Some(document_id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,6 +167,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserE
|
||||
Some(s) => Ok(s.to_string()),
|
||||
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
|
||||
},
|
||||
// a `u64` or `i64` cannot be more than 512 bytes once converted to a string
|
||||
Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
|
||||
content => Err(UserError::InvalidDocumentId { document_id: content }),
|
||||
}
|
||||
|
||||
@@ -106,7 +106,8 @@ pub enum UserError {
|
||||
#[error(
|
||||
"Document identifier `{}` is invalid. \
|
||||
A document identifier can be of type integer or string, \
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", .document_id.to_string()
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||
and can not be more than 512 bytes.", .document_id.to_string()
|
||||
)]
|
||||
InvalidDocumentId { document_id: Value },
|
||||
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
||||
@@ -258,6 +259,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
},
|
||||
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
||||
InvalidSettingsDimensions { embedder_name: String },
|
||||
#[error(
|
||||
"`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
|
||||
)]
|
||||
InvalidDisableBinaryQuantization { embedder_name: String },
|
||||
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
|
||||
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
|
||||
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
||||
|
||||
@@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::vector::{Embedding, EmbeddingConfig};
|
||||
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||
@@ -162,7 +162,7 @@ pub struct Index {
|
||||
/// Maps an embedder name to its id in the arroy store.
|
||||
pub embedder_category_id: Database<Str, U8>,
|
||||
/// Vector store based on arroy™.
|
||||
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
|
||||
pub vector_arroy: arroy::Database<Unspecified>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||
@@ -1614,15 +1614,17 @@ impl Index {
|
||||
&'a self,
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
embedder_id: u8,
|
||||
) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a {
|
||||
quantized: bool,
|
||||
) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
|
||||
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
||||
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e.into()),
|
||||
})
|
||||
.transpose()
|
||||
let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
|
||||
// Here we don't care about the dimensions, but we want to know if we can read
|
||||
// in the database or if its metadata are missing because there is no document with that many vectors.
|
||||
match reader.dimensions(rtxn) {
|
||||
Ok(_) => Some(Ok(reader)),
|
||||
Err(arroy::Error::MissingMetadata(_)) => None,
|
||||
Err(e) => Some(Err(e.into())),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1644,32 +1646,18 @@ impl Index {
|
||||
docid: DocumentId,
|
||||
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
||||
let mut res = BTreeMap::new();
|
||||
for row in self.embedder_category_id.iter(rtxn)? {
|
||||
let (embedder_name, embedder_id) = row?;
|
||||
let embedder_id = (embedder_id as u16) << 8;
|
||||
let mut embeddings = Vec::new();
|
||||
'vectors: for i in 0..=u8::MAX {
|
||||
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose();
|
||||
|
||||
let Some(reader) = reader else {
|
||||
break 'vectors;
|
||||
};
|
||||
|
||||
let embedding = reader?.item_vector(rtxn, docid)?;
|
||||
if let Some(embedding) = embedding {
|
||||
embeddings.push(embedding)
|
||||
} else {
|
||||
break 'vectors;
|
||||
}
|
||||
}
|
||||
|
||||
res.insert(embedder_name.to_owned(), embeddings);
|
||||
let embedding_configs = self.embedding_configs(rtxn)?;
|
||||
for config in embedding_configs {
|
||||
let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
|
||||
let embeddings = self
|
||||
.arroy_readers(rtxn, embedder_id, config.config.quantized())
|
||||
.map_while(|reader| {
|
||||
reader
|
||||
.and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
|
||||
.transpose()
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
res.insert(config.name.to_owned(), embeddings);
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
@@ -190,7 +190,7 @@ impl<'a> Search<'a> {
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
};
|
||||
// no embedder, no semantic search
|
||||
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
||||
let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
};
|
||||
|
||||
@@ -212,7 +212,7 @@ impl<'a> Search<'a> {
|
||||
};
|
||||
|
||||
search.semantic =
|
||||
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder });
|
||||
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });
|
||||
|
||||
// TODO: would be better to have two distinct functions at this point
|
||||
let vector_results = search.execute()?;
|
||||
|
||||
@@ -32,6 +32,7 @@ pub struct SemanticSearch {
|
||||
vector: Option<Vec<f32>>,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
quantized: bool,
|
||||
}
|
||||
|
||||
pub struct Search<'a> {
|
||||
@@ -89,9 +90,10 @@ impl<'a> Search<'a> {
|
||||
&mut self,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
quantized: bool,
|
||||
vector: Option<Vec<f32>>,
|
||||
) -> &mut Search<'a> {
|
||||
self.semantic = Some(SemanticSearch { embedder_name, embedder, vector });
|
||||
self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
|
||||
self
|
||||
}
|
||||
|
||||
@@ -206,7 +208,7 @@ impl<'a> Search<'a> {
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
} = match self.semantic.as_ref() {
|
||||
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => {
|
||||
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
|
||||
execute_vector_search(
|
||||
&mut ctx,
|
||||
vector,
|
||||
@@ -219,6 +221,7 @@ impl<'a> Search<'a> {
|
||||
self.limit,
|
||||
embedder_name,
|
||||
embedder,
|
||||
*quantized,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?
|
||||
|
||||
@@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
Ok(ranking_rules)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn get_ranking_rules_for_vector<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
@@ -320,6 +321,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
target: &[f32],
|
||||
embedder_name: &str,
|
||||
embedder: &Embedder,
|
||||
quantized: bool,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||
// query graph search
|
||||
|
||||
@@ -347,6 +349,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
limit_plus_offset,
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
)?;
|
||||
ranking_rules.push(Box::new(vector_sort));
|
||||
vector = true;
|
||||
@@ -576,6 +579,7 @@ pub fn execute_vector_search(
|
||||
length: usize,
|
||||
embedder_name: &str,
|
||||
embedder: &Embedder,
|
||||
quantized: bool,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
) -> Result<PartialSearchResult> {
|
||||
@@ -591,6 +595,7 @@ pub fn execute_vector_search(
|
||||
vector,
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
)?;
|
||||
|
||||
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
||||
|
||||
@@ -16,6 +16,7 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
||||
limit: usize,
|
||||
distribution_shift: Option<DistributionShift>,
|
||||
embedder_index: u8,
|
||||
quantized: bool,
|
||||
}
|
||||
|
||||
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
@@ -26,6 +27,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
limit: usize,
|
||||
embedder_name: &str,
|
||||
embedder: &Embedder,
|
||||
quantized: bool,
|
||||
) -> Result<Self> {
|
||||
let embedder_index = ctx
|
||||
.index
|
||||
@@ -41,6 +43,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
limit,
|
||||
distribution_shift: embedder.distribution(),
|
||||
embedder_index,
|
||||
quantized,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -49,16 +52,12 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
ctx: &mut SearchContext<'_>,
|
||||
vector_candidates: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
let readers: std::result::Result<Vec<_>, _> =
|
||||
ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
|
||||
let readers = readers?;
|
||||
|
||||
let target = &self.target;
|
||||
let mut results = Vec::new();
|
||||
|
||||
for reader in readers.iter() {
|
||||
for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) {
|
||||
let nns_by_vector =
|
||||
reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?;
|
||||
reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||
results.extend(nns_by_vector.into_iter());
|
||||
}
|
||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||
|
||||
@@ -18,9 +18,11 @@ pub struct Similar<'a> {
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
quantized: bool,
|
||||
}
|
||||
|
||||
impl<'a> Similar<'a> {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
id: DocumentId,
|
||||
offset: usize,
|
||||
@@ -29,6 +31,7 @@ impl<'a> Similar<'a> {
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
quantized: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
@@ -40,6 +43,7 @@ impl<'a> Similar<'a> {
|
||||
embedder_name,
|
||||
embedder,
|
||||
ranking_score_threshold: None,
|
||||
quantized,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,19 +71,13 @@ impl<'a> Similar<'a> {
|
||||
.get(self.rtxn, &self.embedder_name)?
|
||||
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
||||
|
||||
let readers: std::result::Result<Vec<_>, _> =
|
||||
self.index.arroy_readers(self.rtxn, embedder_index).collect();
|
||||
|
||||
let readers = readers?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
for reader in readers.iter() {
|
||||
let nns_by_item = reader.nns_by_item(
|
||||
for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) {
|
||||
let nns_by_item = reader?.nns_by_item(
|
||||
self.rtxn,
|
||||
self.id,
|
||||
self.limit + self.offset + 1,
|
||||
None,
|
||||
Some(&universe),
|
||||
)?;
|
||||
if let Some(mut nns_by_item) = nns_by_item {
|
||||
|
||||
@@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::vector::settings::{EmbedderAction, ReindexAction};
|
||||
use crate::vector::settings::ReindexAction;
|
||||
use crate::vector::{Embedder, Embeddings};
|
||||
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
||||
|
||||
@@ -208,65 +208,65 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
|
||||
if reindex_vectors {
|
||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||
match action {
|
||||
EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted
|
||||
EmbedderAction::Reindex(action) => {
|
||||
let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name)
|
||||
else {
|
||||
tracing::error!(embedder = name, "Requested embedder config not found");
|
||||
continue;
|
||||
};
|
||||
if let Some(action) = action.reindex() {
|
||||
let Some((embedder_name, (embedder, prompt, _quantized))) =
|
||||
configs.remove_entry(name)
|
||||
else {
|
||||
tracing::error!(embedder = name, "Requested embedder config not found");
|
||||
continue;
|
||||
};
|
||||
|
||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||
let manual_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||
let manual_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
// (docid) -> (prompt)
|
||||
let prompts_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
// (docid) -> (prompt)
|
||||
let prompts_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
// (docid) -> ()
|
||||
let remove_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
// (docid) -> ()
|
||||
let remove_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
let action = match action {
|
||||
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
||||
ReindexAction::RegeneratePrompts => {
|
||||
let Some((_, old_prompt)) = old_configs.get(name) else {
|
||||
tracing::error!(embedder = name, "Old embedder config not found");
|
||||
continue;
|
||||
};
|
||||
let action = match action {
|
||||
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
||||
ReindexAction::RegeneratePrompts => {
|
||||
let Some((_, old_prompt, _quantized)) = old_configs.get(name) else {
|
||||
tracing::error!(embedder = name, "Old embedder config not found");
|
||||
continue;
|
||||
};
|
||||
|
||||
ExtractionAction::SettingsRegeneratePrompts { old_prompt }
|
||||
}
|
||||
};
|
||||
ExtractionAction::SettingsRegeneratePrompts { old_prompt }
|
||||
}
|
||||
};
|
||||
|
||||
extractors.push(EmbedderVectorExtractor {
|
||||
embedder_name,
|
||||
embedder,
|
||||
prompt,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
manual_vectors_writer,
|
||||
add_to_user_provided: RoaringBitmap::new(),
|
||||
action,
|
||||
});
|
||||
}
|
||||
extractors.push(EmbedderVectorExtractor {
|
||||
embedder_name,
|
||||
embedder,
|
||||
prompt,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
manual_vectors_writer,
|
||||
add_to_user_provided: RoaringBitmap::new(),
|
||||
action,
|
||||
});
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// document operation
|
||||
|
||||
for (embedder_name, (embedder, prompt)) in configs.into_iter() {
|
||||
for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() {
|
||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||
let manual_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
|
||||
@@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
|
||||
use crate::update::{
|
||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||
};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::vector::{ArroyWrapper, EmbeddingConfigs};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
||||
|
||||
static MERGED_DATABASE_COUNT: usize = 7;
|
||||
@@ -679,6 +679,24 @@ where
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
|
||||
// If an embedder wasn't used in the typedchunk but must be binary quantized
|
||||
// we should insert it in `dimension`
|
||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
|
||||
let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or(
|
||||
InternalError::DatabaseMissingEntry {
|
||||
db_name: "embedder_category_id",
|
||||
key: None,
|
||||
},
|
||||
)?;
|
||||
let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
|
||||
let reader =
|
||||
ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
|
||||
let dim = reader.dimensions(self.wtxn)?;
|
||||
dimension.insert(name.to_string(), dim);
|
||||
}
|
||||
}
|
||||
|
||||
for (embedder_name, dimension) in dimension {
|
||||
let wtxn = &mut *self.wtxn;
|
||||
let vector_arroy = self.index.vector_arroy;
|
||||
@@ -686,13 +704,23 @@ where
|
||||
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||
)?;
|
||||
let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
|
||||
let was_quantized = settings_diff
|
||||
.old
|
||||
.embedding_configs
|
||||
.get(&embedder_name)
|
||||
.map_or(false, |conf| conf.2);
|
||||
let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
|
||||
|
||||
pool.install(|| {
|
||||
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
||||
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
||||
if writer.need_build(wtxn)? {
|
||||
writer.build(wtxn, &mut rng, None)?;
|
||||
} else if writer.is_empty(wtxn)? {
|
||||
let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
|
||||
if is_quantizing {
|
||||
writer.quantize(wtxn, k, dimension)?;
|
||||
}
|
||||
if writer.need_build(wtxn, dimension)? {
|
||||
writer.build(wtxn, &mut rng, dimension)?;
|
||||
} else if writer.is_empty(wtxn, dimension)? {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -2746,6 +2774,7 @@ mod tests {
|
||||
response: Setting::NotSet,
|
||||
distribution: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
binary_quantized: Setting::NotSet,
|
||||
}),
|
||||
);
|
||||
settings.set_embedder_settings(embedders);
|
||||
@@ -2774,7 +2803,7 @@ mod tests {
|
||||
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
||||
let res = index
|
||||
.search(&rtxn)
|
||||
.semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec()))
|
||||
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
|
||||
.execute()
|
||||
.unwrap();
|
||||
assert_eq!(res.documents_ids.len(), 3);
|
||||
|
||||
@@ -28,7 +28,8 @@ use crate::update::index_documents::GrenadParameters;
|
||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
|
||||
use crate::vector::settings::WriteBackToDocuments;
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::{
|
||||
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||
};
|
||||
@@ -989,19 +990,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
None
|
||||
};
|
||||
|
||||
let readers: Result<
|
||||
BTreeMap<&str, (Vec<arroy::Reader<'_, arroy::distances::Angular>>, &RoaringBitmap)>,
|
||||
> = settings_diff
|
||||
let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
|
||||
.embedding_config_updates
|
||||
.iter()
|
||||
.filter_map(|(name, action)| {
|
||||
if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
||||
embedder_id,
|
||||
user_provided,
|
||||
}) = action
|
||||
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
|
||||
action.write_back()
|
||||
{
|
||||
let readers: Result<Vec<_>> =
|
||||
self.index.arroy_readers(wtxn, *embedder_id).collect();
|
||||
let readers: Result<Vec<_>> = self
|
||||
.index
|
||||
.arroy_readers(wtxn, *embedder_id, action.was_quantized)
|
||||
.collect();
|
||||
match readers {
|
||||
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
|
||||
Err(error) => Some(Err(error)),
|
||||
@@ -1104,23 +1103,14 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
let mut writers = Vec::new();
|
||||
|
||||
// delete all vectors from the embedders that need removal
|
||||
for (_, (readers, _)) in readers {
|
||||
for reader in readers {
|
||||
let dimensions = reader.dimensions();
|
||||
let arroy_index = reader.index();
|
||||
drop(reader);
|
||||
let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions);
|
||||
writers.push(writer);
|
||||
let dimensions = reader.dimensions(wtxn)?;
|
||||
reader.clear(wtxn, dimensions)?;
|
||||
}
|
||||
}
|
||||
|
||||
for writer in writers {
|
||||
writer.clear(wtxn)?;
|
||||
}
|
||||
|
||||
let grenad_params = GrenadParameters {
|
||||
chunk_compression_type: self.indexer_settings.chunk_compression_type,
|
||||
chunk_compression_level: self.indexer_settings.chunk_compression_level,
|
||||
|
||||
@@ -27,6 +27,7 @@ use crate::update::index_documents::helpers::{
|
||||
as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
|
||||
};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::{
|
||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||
Result, SerializationError, U8StrStrCodec,
|
||||
@@ -666,9 +667,14 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||
)?;
|
||||
let binary_quantized = settings_diff
|
||||
.old
|
||||
.embedding_configs
|
||||
.get(&embedder_name)
|
||||
.map_or(false, |conf| conf.2);
|
||||
// FIXME: allow customizing distance
|
||||
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
|
||||
.map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension))
|
||||
.map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
|
||||
.collect();
|
||||
|
||||
// remove vectors for docids we want them removed
|
||||
@@ -679,7 +685,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
|
||||
for writer in &writers {
|
||||
// Uses invariant: vectors are packed in the first writers.
|
||||
if !writer.del_item(wtxn, docid)? {
|
||||
if !writer.del_item(wtxn, expected_dimension, docid)? {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -711,7 +717,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
)));
|
||||
}
|
||||
for (embedding, writer) in embeddings.iter().zip(&writers) {
|
||||
writer.add_item(wtxn, docid, embedding)?;
|
||||
writer.add_item(wtxn, expected_dimension, docid, embedding)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -734,7 +740,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
break;
|
||||
};
|
||||
if candidate == vector {
|
||||
writer.del_item(wtxn, docid)?;
|
||||
writer.del_item(wtxn, expected_dimension, docid)?;
|
||||
deleted_index = Some(index);
|
||||
}
|
||||
}
|
||||
@@ -751,8 +757,13 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
if let Some((last_index, vector)) = last_index_with_a_vector {
|
||||
// unwrap: computed the index from the list of writers
|
||||
let writer = writers.get(last_index).unwrap();
|
||||
writer.del_item(wtxn, docid)?;
|
||||
writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?;
|
||||
writer.del_item(wtxn, expected_dimension, docid)?;
|
||||
writers.get(deleted_index).unwrap().add_item(
|
||||
wtxn,
|
||||
expected_dimension,
|
||||
docid,
|
||||
&vector,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -762,8 +773,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
|
||||
// overflow was detected during vector extraction.
|
||||
for writer in &writers {
|
||||
if !writer.contains_item(wtxn, docid)? {
|
||||
writer.add_item(wtxn, docid, &vector)?;
|
||||
if !writer.contains_item(wtxn, expected_dimension, docid)? {
|
||||
writer.add_item(wtxn, expected_dimension, docid, &vector)?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -954,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
let old_configs = self.index.embedding_configs(self.wtxn)?;
|
||||
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
|
||||
.into_iter()
|
||||
.map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> {
|
||||
.map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> {
|
||||
let embedder_id =
|
||||
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
|
||||
crate::InternalError::DatabaseMissingEntry {
|
||||
@@ -964,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
)?;
|
||||
Ok((
|
||||
name,
|
||||
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
||||
embedder_id,
|
||||
user_provided,
|
||||
}),
|
||||
EmbedderAction::with_write_back(
|
||||
WriteBackToDocuments { embedder_id, user_provided },
|
||||
config.quantized(),
|
||||
),
|
||||
))
|
||||
})
|
||||
.collect();
|
||||
@@ -1004,7 +1004,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
match joined {
|
||||
// updated config
|
||||
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
|
||||
let settings_diff = SettingsDiff::from_settings(old, new);
|
||||
let was_quantized = old.binary_quantized.set().unwrap_or_default();
|
||||
let settings_diff = SettingsDiff::from_settings(&name, old, new)?;
|
||||
match settings_diff {
|
||||
SettingsDiff::Remove => {
|
||||
tracing::debug!(
|
||||
@@ -1023,25 +1024,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.index.embedder_category_id.delete(self.wtxn, &name)?;
|
||||
embedder_actions.insert(
|
||||
name,
|
||||
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
||||
embedder_id,
|
||||
user_provided,
|
||||
}),
|
||||
EmbedderAction::with_write_back(
|
||||
WriteBackToDocuments { embedder_id, user_provided },
|
||||
was_quantized,
|
||||
),
|
||||
);
|
||||
}
|
||||
SettingsDiff::Reindex { action, updated_settings } => {
|
||||
SettingsDiff::Reindex { action, updated_settings, quantize } => {
|
||||
tracing::debug!(
|
||||
embedder = name,
|
||||
user_provided = user_provided.len(),
|
||||
?action,
|
||||
"reindex embedder"
|
||||
);
|
||||
embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action));
|
||||
embedder_actions.insert(
|
||||
name.clone(),
|
||||
EmbedderAction::with_reindex(action, was_quantized)
|
||||
.with_is_being_quantized(quantize),
|
||||
);
|
||||
let new =
|
||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||
updated_configs.insert(name, (new, user_provided));
|
||||
}
|
||||
SettingsDiff::UpdateWithoutReindex { updated_settings } => {
|
||||
SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
|
||||
tracing::debug!(
|
||||
embedder = name,
|
||||
user_provided = user_provided.len(),
|
||||
@@ -1049,6 +1054,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
);
|
||||
let new =
|
||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||
if quantize {
|
||||
embedder_actions.insert(
|
||||
name.clone(),
|
||||
EmbedderAction::default().with_is_being_quantized(true),
|
||||
);
|
||||
}
|
||||
updated_configs.insert(name, (new, user_provided));
|
||||
}
|
||||
}
|
||||
@@ -1067,8 +1078,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
&mut setting,
|
||||
);
|
||||
let setting = validate_embedding_settings(setting, &name)?;
|
||||
embedder_actions
|
||||
.insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex));
|
||||
embedder_actions.insert(
|
||||
name.clone(),
|
||||
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
|
||||
);
|
||||
updated_configs.insert(name, (setting, RoaringBitmap::new()));
|
||||
}
|
||||
}
|
||||
@@ -1082,19 +1095,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
let mut find_free_index =
|
||||
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
|
||||
for (name, action) in embedder_actions.iter() {
|
||||
match action {
|
||||
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => {
|
||||
/* cannot be a new embedder, so has to have an id already */
|
||||
}
|
||||
EmbedderAction::Reindex(ReindexAction::FullReindex) => {
|
||||
if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() {
|
||||
let id = find_free_index()
|
||||
.ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
||||
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
||||
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
||||
}
|
||||
}
|
||||
EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ }
|
||||
// ignore actions that are not possible for a new embedder
|
||||
if matches!(action.reindex(), Some(ReindexAction::FullReindex))
|
||||
&& self.index.embedder_category_id.get(self.wtxn, name)?.is_none()
|
||||
{
|
||||
let id =
|
||||
find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
||||
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
||||
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
||||
}
|
||||
}
|
||||
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
|
||||
@@ -1277,7 +1285,11 @@ impl InnerIndexSettingsDiff {
|
||||
|
||||
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||
if cache_user_defined_searchables {
|
||||
for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() {
|
||||
for (embedder_name, (config, _, _quantized)) in
|
||||
new_settings.embedding_configs.inner_as_ref()
|
||||
{
|
||||
let was_quantized =
|
||||
old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2);
|
||||
// skip embedders that don't use document templates
|
||||
if !config.uses_document_template() {
|
||||
continue;
|
||||
@@ -1287,16 +1299,19 @@ impl InnerIndexSettingsDiff {
|
||||
// this always makes the code clearer by explicitly handling the cases
|
||||
match embedding_config_updates.entry(embedder_name.clone()) {
|
||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||
entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts));
|
||||
entry.insert(EmbedderAction::with_reindex(
|
||||
ReindexAction::RegeneratePrompts,
|
||||
was_quantized,
|
||||
));
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(entry) => {
|
||||
let EmbedderAction {
|
||||
was_quantized: _,
|
||||
is_being_quantized: _,
|
||||
write_back: _, // We are deleting this embedder, so no point in regeneration
|
||||
reindex: _, // We are already fully reindexing
|
||||
} = entry.get();
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() {
|
||||
EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */
|
||||
}
|
||||
EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */
|
||||
}
|
||||
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1546,7 +1561,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
||||
.map(
|
||||
|IndexEmbeddingConfig {
|
||||
name,
|
||||
config: EmbeddingConfig { embedder_options, prompt },
|
||||
config: EmbeddingConfig { embedder_options, prompt, quantized },
|
||||
..
|
||||
}| {
|
||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||
@@ -1556,7 +1571,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
||||
.map_err(crate::vector::Error::from)
|
||||
.map_err(crate::Error::from)?,
|
||||
);
|
||||
Ok((name, (embedder, prompt)))
|
||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
@@ -1581,6 +1596,7 @@ fn validate_prompt(
|
||||
response,
|
||||
distribution,
|
||||
headers,
|
||||
binary_quantized: binary_quantize,
|
||||
}) => {
|
||||
let max_bytes = match document_template_max_bytes.set() {
|
||||
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
|
||||
@@ -1613,6 +1629,7 @@ fn validate_prompt(
|
||||
response,
|
||||
distribution,
|
||||
headers,
|
||||
binary_quantized: binary_quantize,
|
||||
}))
|
||||
}
|
||||
new => Ok(new),
|
||||
@@ -1638,6 +1655,7 @@ pub fn validate_embedding_settings(
|
||||
response,
|
||||
distribution,
|
||||
headers,
|
||||
binary_quantized: binary_quantize,
|
||||
} = settings;
|
||||
|
||||
if let Some(0) = dimensions.set() {
|
||||
@@ -1678,6 +1696,7 @@ pub fn validate_embedding_settings(
|
||||
response,
|
||||
distribution,
|
||||
headers,
|
||||
binary_quantized: binary_quantize,
|
||||
}));
|
||||
};
|
||||
match inferred_source {
|
||||
@@ -1779,6 +1798,7 @@ pub fn validate_embedding_settings(
|
||||
response,
|
||||
distribution,
|
||||
headers,
|
||||
binary_quantized: binary_quantize,
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arroy::distances::{Angular, BinaryQuantizedAngular};
|
||||
use arroy::ItemId;
|
||||
use deserr::{DeserializeError, Deserr};
|
||||
use heed::{RoTxn, RwTxn, Unspecified};
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use self::error::{EmbedError, NewEmbedderError};
|
||||
@@ -26,6 +30,171 @@ pub type Embedding = Vec<f32>;
|
||||
|
||||
pub const REQUEST_PARALLELISM: usize = 40;
|
||||
|
||||
pub struct ArroyWrapper {
|
||||
quantized: bool,
|
||||
index: u16,
|
||||
database: arroy::Database<Unspecified>,
|
||||
}
|
||||
|
||||
impl ArroyWrapper {
|
||||
pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self {
|
||||
Self { database, index, quantized }
|
||||
}
|
||||
|
||||
pub fn index(&self) -> u16 {
|
||||
self.index
|
||||
}
|
||||
|
||||
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
|
||||
if self.quantized {
|
||||
Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions())
|
||||
} else {
|
||||
Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn quantize(
|
||||
&mut self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: u16,
|
||||
dimension: usize,
|
||||
) -> Result<(), arroy::Error> {
|
||||
if !self.quantized {
|
||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
|
||||
self.quantized = true;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build<R: rand::Rng + rand::SeedableRng>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
rng: &mut R,
|
||||
dimension: usize,
|
||||
) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_item(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
dimension: usize,
|
||||
item_id: arroy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension)
|
||||
.add_item(wtxn, item_id, vector)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension)
|
||||
.add_item(wtxn, item_id, vector)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn del_item(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
dimension: usize,
|
||||
item_id: arroy::ItemId,
|
||||
) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_item(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
dimension: usize,
|
||||
item: arroy::ItemId,
|
||||
) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn nns_by_item(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
item: ItemId,
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Option<Vec<(ItemId, f32)>>, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Reader::open(rtxn, self.index, self.quantized_db())?
|
||||
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||
} else {
|
||||
arroy::Reader::open(rtxn, self.index, self.angular_db())?
|
||||
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn nns_by_vector(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
item: &[f32],
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Reader::open(txn, self.index, self.quantized_db())?
|
||||
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||
} else {
|
||||
arroy::Reader::open(txn, self.index, self.angular_db())?
|
||||
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result<Option<Vec<f32>>, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid)
|
||||
} else {
|
||||
arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid)
|
||||
}
|
||||
}
|
||||
|
||||
fn angular_db(&self) -> arroy::Database<Angular> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
|
||||
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedAngular> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
}
|
||||
|
||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||
pub struct Embeddings<F> {
|
||||
data: Vec<F>,
|
||||
@@ -124,62 +293,48 @@ pub struct EmbeddingConfig {
|
||||
pub embedder_options: EmbedderOptions,
|
||||
/// Document template
|
||||
pub prompt: PromptData,
|
||||
/// If this embedder is binary quantized
|
||||
pub quantized: Option<bool>,
|
||||
// TODO: add metrics and anything needed
|
||||
}
|
||||
|
||||
impl EmbeddingConfig {
|
||||
pub fn quantized(&self) -> bool {
|
||||
self.quantized.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Map of embedder configurations.
|
||||
///
|
||||
/// Each configuration is mapped to a name.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>)>);
|
||||
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>);
|
||||
|
||||
impl EmbeddingConfigs {
|
||||
/// Create the map from its internal component.s
|
||||
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>)>) -> Self {
|
||||
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>) -> Self {
|
||||
Self(data)
|
||||
}
|
||||
|
||||
/// Get an embedder configuration and template from its name.
|
||||
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
||||
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||
self.0.get(name).cloned()
|
||||
}
|
||||
|
||||
/// Get the default embedder configuration, if any.
|
||||
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
||||
self.get(self.get_default_embedder_name())
|
||||
}
|
||||
|
||||
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
||||
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
||||
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Get the name of the default embedder configuration.
|
||||
///
|
||||
/// The default embedder is determined as follows:
|
||||
///
|
||||
/// - If there is only one embedder, it is always the default.
|
||||
/// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder.
|
||||
/// - In all other cases, there is no default embedder.
|
||||
pub fn get_default_embedder_name(&self) -> &str {
|
||||
let mut it = self.0.keys();
|
||||
let first_name = it.next();
|
||||
let second_name = it.next();
|
||||
match (first_name, second_name) {
|
||||
(None, _) => "default",
|
||||
(Some(first), None) => first,
|
||||
(Some(_), Some(_)) => "default",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for EmbeddingConfigs {
|
||||
type Item = (String, (Arc<Embedder>, Arc<Prompt>));
|
||||
type Item = (String, (Arc<Embedder>, Arc<Prompt>, bool));
|
||||
|
||||
type IntoIter = std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>)>;
|
||||
type IntoIter =
|
||||
std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>, bool)>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.into_iter()
|
||||
|
||||
@@ -32,6 +32,9 @@ pub struct EmbeddingSettings {
|
||||
pub dimensions: Setting<usize>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
pub binary_quantized: Setting<bool>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
pub document_template: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
@@ -85,23 +88,63 @@ pub enum ReindexAction {
|
||||
|
||||
pub enum SettingsDiff {
|
||||
Remove,
|
||||
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings },
|
||||
UpdateWithoutReindex { updated_settings: EmbeddingSettings },
|
||||
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool },
|
||||
UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool },
|
||||
}
|
||||
|
||||
pub enum EmbedderAction {
|
||||
WriteBackToDocuments(WriteBackToDocuments),
|
||||
Reindex(ReindexAction),
|
||||
#[derive(Default, Debug)]
|
||||
pub struct EmbedderAction {
|
||||
pub was_quantized: bool,
|
||||
pub is_being_quantized: bool,
|
||||
pub write_back: Option<WriteBackToDocuments>,
|
||||
pub reindex: Option<ReindexAction>,
|
||||
}
|
||||
|
||||
impl EmbedderAction {
|
||||
pub fn is_being_quantized(&self) -> bool {
|
||||
self.is_being_quantized
|
||||
}
|
||||
|
||||
pub fn write_back(&self) -> Option<&WriteBackToDocuments> {
|
||||
self.write_back.as_ref()
|
||||
}
|
||||
|
||||
pub fn reindex(&self) -> Option<&ReindexAction> {
|
||||
self.reindex.as_ref()
|
||||
}
|
||||
|
||||
pub fn with_is_being_quantized(mut self, quantize: bool) -> Self {
|
||||
self.is_being_quantized = quantize;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self {
|
||||
Self {
|
||||
was_quantized,
|
||||
is_being_quantized: false,
|
||||
write_back: Some(write_back),
|
||||
reindex: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self {
|
||||
Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct WriteBackToDocuments {
|
||||
pub embedder_id: u8,
|
||||
pub user_provided: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl SettingsDiff {
|
||||
pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self {
|
||||
match new {
|
||||
pub fn from_settings(
|
||||
embedder_name: &str,
|
||||
old: EmbeddingSettings,
|
||||
new: Setting<EmbeddingSettings>,
|
||||
) -> Result<Self, UserError> {
|
||||
let ret = match new {
|
||||
Setting::Set(new) => {
|
||||
let EmbeddingSettings {
|
||||
mut source,
|
||||
@@ -116,6 +159,7 @@ impl SettingsDiff {
|
||||
mut distribution,
|
||||
mut headers,
|
||||
mut document_template_max_bytes,
|
||||
binary_quantized: mut binary_quantize,
|
||||
} = old;
|
||||
|
||||
let EmbeddingSettings {
|
||||
@@ -131,8 +175,17 @@ impl SettingsDiff {
|
||||
distribution: new_distribution,
|
||||
headers: new_headers,
|
||||
document_template_max_bytes: new_document_template_max_bytes,
|
||||
binary_quantized: new_binary_quantize,
|
||||
} = new;
|
||||
|
||||
if matches!(binary_quantize, Setting::Set(true))
|
||||
&& matches!(new_binary_quantize, Setting::Set(false))
|
||||
{
|
||||
return Err(UserError::InvalidDisableBinaryQuantization {
|
||||
embedder_name: embedder_name.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let mut reindex_action = None;
|
||||
|
||||
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
||||
@@ -172,6 +225,7 @@ impl SettingsDiff {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let binary_quantize_changed = binary_quantize.apply(new_binary_quantize);
|
||||
if url.apply(new_url) {
|
||||
match source {
|
||||
// do not regenerate on an url change in OpenAI
|
||||
@@ -231,16 +285,27 @@ impl SettingsDiff {
|
||||
distribution,
|
||||
headers,
|
||||
document_template_max_bytes,
|
||||
binary_quantized: binary_quantize,
|
||||
};
|
||||
|
||||
match reindex_action {
|
||||
Some(action) => Self::Reindex { action, updated_settings },
|
||||
None => Self::UpdateWithoutReindex { updated_settings },
|
||||
Some(action) => Self::Reindex {
|
||||
action,
|
||||
updated_settings,
|
||||
quantize: binary_quantize_changed,
|
||||
},
|
||||
None => Self::UpdateWithoutReindex {
|
||||
updated_settings,
|
||||
quantize: binary_quantize_changed,
|
||||
},
|
||||
}
|
||||
}
|
||||
Setting::Reset => Self::Remove,
|
||||
Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old },
|
||||
}
|
||||
Setting::NotSet => {
|
||||
Self::UpdateWithoutReindex { updated_settings: old, quantize: false }
|
||||
}
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -486,7 +551,7 @@ impl std::fmt::Display for EmbedderSource {
|
||||
|
||||
impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
fn from(value: EmbeddingConfig) -> Self {
|
||||
let EmbeddingConfig { embedder_options, prompt } = value;
|
||||
let EmbeddingConfig { embedder_options, prompt, quantized } = value;
|
||||
let document_template_max_bytes =
|
||||
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
||||
match embedder_options {
|
||||
@@ -507,6 +572,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||
url,
|
||||
@@ -527,6 +593,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||
embedding_model,
|
||||
@@ -547,6 +614,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||
dimensions,
|
||||
@@ -564,6 +632,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||
api_key,
|
||||
@@ -586,6 +655,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::Set(response),
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
headers: Setting::Set(headers),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -607,8 +677,11 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
||||
response,
|
||||
distribution,
|
||||
headers,
|
||||
binary_quantized,
|
||||
} = value;
|
||||
|
||||
this.quantized = binary_quantized.set();
|
||||
|
||||
if let Some(source) = source.set() {
|
||||
match source {
|
||||
EmbedderSource::OpenAi => {
|
||||
|
||||
Reference in New Issue
Block a user