mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
90 Commits
update-seg
...
expose-sta
Author | SHA1 | Date | |
---|---|---|---|
af5b1a88a4 | |||
d6bd88ce4f | |||
3f1a510069 | |||
06a9803544 | |||
b2588d8101 | |||
62d27172f4 | |||
1ab88e10b9 | |||
6a4b2516aa | |||
abdc4afcca | |||
75d5c0ae1f | |||
a88554216a | |||
2cf3e1c80a | |||
e1fbfde6c4 | |||
27b75ec648 | |||
07fdb081a4 | |||
ca006e38ec | |||
e26bd87780 | |||
c01e498a63 | |||
ca6cc4654b | |||
3bd9d2478c | |||
54b15059a0 | |||
d35278320e | |||
e172e938e7 | |||
02b3d82c60 | |||
fd2c95999d | |||
e248d2a1e6 | |||
487431a035 | |||
b6d450d484 | |||
dc949ab46a | |||
7f3e51349e | |||
19acc65ad2 | |||
3a3ab17714 | |||
eaf57056ca | |||
e340705634 | |||
fe17c0f52e | |||
14bc80e3df | |||
bc5663e673 | |||
8a941c0241 | |||
3412e7fbcf | |||
16037e2169 | |||
8f7c8ca7f0 | |||
ba75d23bfe | |||
7fbb3bf8e8 | |||
500ddc76b5 | |||
9066a446a3 | |||
eccbcf5130 | |||
943f8dba0c | |||
1aa8ed9ef7 | |||
f762307838 | |||
3e94a90722 | |||
abe29772db | |||
c9ac7f2e7e | |||
7e251b43d4 | |||
9969f7a638 | |||
b17cb56dee | |||
afcd7b9f0c | |||
fc7e817221 | |||
0f78703b85 | |||
30cf972987 | |||
d05d49ffd8 | |||
0462ebbe58 | |||
2f7a8a4efb | |||
02714ef5ed | |||
52d9cb6e5a | |||
261de888b7 | |||
98c811247e | |||
59ecf1cea7 | |||
273c6e8c5c | |||
897d25780e | |||
c85d1752dd | |||
8e6ffbfc6f | |||
7c19c072fa | |||
673b6e1dc0 | |||
f2d0a59f1d | |||
c78a2fa4f5 | |||
5542f1d9f1 | |||
ad4d8502b3 | |||
7ec4e2a3fb | |||
9fffb8e83d | |||
caa6a7149a | |||
a0082c4df9 | |||
b0afe0972e | |||
9ecde41853 | |||
685f452fb2 | |||
4e4a1ddff7 | |||
c22460045c | |||
76bb6d565c | |||
9d3ff11b21 | |||
43763eb98a | |||
2a0ece814c |
9
.github/workflows/bench-pr.yml
vendored
9
.github/workflows/bench-pr.yml
vendored
@ -43,4 +43,11 @@ jobs:
|
||||
|
||||
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
||||
run: |
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" -- ${{ steps.command.outputs.command-arguments }}
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" \
|
||||
--dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" \
|
||||
--reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" \
|
||||
-- ${{ steps.command.outputs.command-arguments }} > benchlinks.txt
|
||||
|
||||
- name: Send comment in PR
|
||||
run: |
|
||||
gh pr comment ${{github.event.issue.number}} --body-file benchlinks.txt
|
||||
|
@ -187,8 +187,8 @@ They are JSON files with the following structure (comments are not actually supp
|
||||
},
|
||||
// Core of the workload.
|
||||
// A list of commands to run sequentially.
|
||||
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
|
||||
"commands": [
|
||||
// Optional: A precommand is a request to the Meilisearch instance that is executed before the profiling runs.
|
||||
"precommands": [
|
||||
{
|
||||
// Meilisearch route to call. `http://localhost:7700/` will be prepended.
|
||||
"route": "indexes/movies/settings",
|
||||
@ -224,8 +224,11 @@ They are JSON files with the following structure (comments are not actually supp
|
||||
// - DontWait: run the next command without waiting the response to this one.
|
||||
// - WaitForResponse: run the next command as soon as the response from the server is received.
|
||||
// - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing.
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
|
86
Cargo.lock
generated
86
Cargo.lock
generated
@ -378,9 +378,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.2.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "efddeb1e7c32a551cc07ef4c3e181e3cd5478fdaf4f0bd799983171c1f6efe57"
|
||||
checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@ -500,7 +500,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@ -645,7 +645,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@ -1536,16 +1536,16 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "doxygen-rs"
|
||||
version = "0.2.2"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
|
||||
checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
|
||||
dependencies = [
|
||||
"phf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -1793,7 +1793,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@ -1816,7 +1816,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@ -1836,7 +1836,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -1954,7 +1954,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@ -2262,12 +2262,11 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.20.0-alpha.9"
|
||||
version = "0.20.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
|
||||
checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"heed-traits",
|
||||
"heed-types",
|
||||
@ -2281,15 +2280,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "heed-traits"
|
||||
version = "0.20.0-alpha.9"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
|
||||
checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
|
||||
|
||||
[[package]]
|
||||
name = "heed-types"
|
||||
version = "0.20.0-alpha.9"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
|
||||
checksum = "3cb0d6ba3700c9a57e83c013693e3eddb68a6d9b6781cacafc62a0d992e8ddb3"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@ -2448,7 +2447,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -2465,7 +2464,6 @@ dependencies = [
|
||||
"meilisearch-auth",
|
||||
"meilisearch-types",
|
||||
"page_size 0.5.0",
|
||||
"puffin",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"serde",
|
||||
@ -2644,7 +2642,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -3189,14 +3187,13 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
|
||||
|
||||
[[package]]
|
||||
name = "lmdb-master-sys"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
|
||||
checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"doxygen-rs",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3233,12 +3230,6 @@ version = "0.4.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
|
||||
|
||||
[[package]]
|
||||
name = "macro_rules_attribute"
|
||||
version = "0.2.0"
|
||||
@ -3281,7 +3272,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@ -3290,7 +3281,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@ -3343,7 +3334,6 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"platform-dirs",
|
||||
"prometheus",
|
||||
"puffin",
|
||||
"rand",
|
||||
"rayon",
|
||||
"regex",
|
||||
@ -3383,7 +3373,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"enum-iterator",
|
||||
@ -3402,7 +3392,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@ -3432,7 +3422,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@ -3471,7 +3461,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"big_s",
|
||||
@ -3511,7 +3501,6 @@ dependencies = [
|
||||
"obkv",
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"puffin",
|
||||
"rand",
|
||||
"rand_pcg",
|
||||
"rayon",
|
||||
@ -3912,7 +3901,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@ -4182,23 +4171,6 @@ version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"cfg-if",
|
||||
"instant",
|
||||
"lz4_flex",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulp"
|
||||
version = "0.18.9"
|
||||
@ -6080,7 +6052,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.8.0"
|
||||
version = "1.9.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
16
README.md
16
README.md
@ -25,7 +25,7 @@
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||
@ -39,8 +39,8 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
|
||||
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
@ -55,15 +55,15 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
You can consult Meilisearch's documentation at [meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
## ⚡ Supercharge your Meilisearch experience
|
||||
## 🌍 Supercharge your Meilisearch experience
|
||||
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Additional features include analytics & monitoring in many regions around the world. No credit card is required.
|
||||
|
||||
## 🧰 SDKs & integration tools
|
||||
|
||||
@ -85,13 +85,13 @@ Finally, for more in-depth information, refer to our articles explaining fundame
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
Meilisearch is a search engine created by [Meili]([https://www.welcometothejungle.com/en/companies/meilisearch](https://www.meilisearch.com/careers)), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
|
@ -197,6 +197,140 @@ pub(crate) mod test {
|
||||
use super::*;
|
||||
use crate::reader::v6::RuntimeTogglableFeatures;
|
||||
|
||||
#[test]
|
||||
fn import_dump_v6_with_vectors() {
|
||||
// dump containing two indexes
|
||||
//
|
||||
// "vector", configured with an embedder
|
||||
// contains:
|
||||
// - one document with an overriden vector,
|
||||
// - one document with a natural vector
|
||||
// - one document with a _vectors map containing one additional embedder name and a natural vector
|
||||
// - one document with a _vectors map containing one additional embedder name and an overriden vector
|
||||
//
|
||||
// "novector", no embedder
|
||||
// contains:
|
||||
// - a document without vector
|
||||
// - a document with a random _vectors field
|
||||
let dump = File::open("tests/assets/v6-with-vectors.dump").unwrap();
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"278f63325ef06ca04d01df98d8207b94");
|
||||
assert_eq!(update_files.len(), 10);
|
||||
assert!(update_files[0].is_none()); // the dump creation
|
||||
assert!(update_files[1].is_none());
|
||||
assert!(update_files[2].is_none());
|
||||
assert!(update_files[3].is_none());
|
||||
assert!(update_files[4].is_none());
|
||||
assert!(update_files[5].is_none());
|
||||
assert!(update_files[6].is_none());
|
||||
assert!(update_files[7].is_none());
|
||||
assert!(update_files[8].is_none());
|
||||
assert!(update_files[9].is_none());
|
||||
|
||||
// indexes
|
||||
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
// the index are not ordered in any way by default
|
||||
indexes.sort_by_key(|index| index.metadata().uid.to_string());
|
||||
|
||||
let mut vector_index = indexes.pop().unwrap();
|
||||
let mut novector_index = indexes.pop().unwrap();
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// vector
|
||||
|
||||
insta::assert_json_snapshot!(vector_index.metadata(), @r###"
|
||||
{
|
||||
"uid": "vector",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2024-05-16T15:33:17.240962Z",
|
||||
"updatedAt": "2024-05-16T15:40:55.723052Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
{
|
||||
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
||||
let mut documents = documents.unwrap();
|
||||
assert_eq!(documents.len(), 4);
|
||||
|
||||
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
|
||||
|
||||
{
|
||||
let document = documents.pop().unwrap();
|
||||
insta::assert_json_snapshot!(document);
|
||||
}
|
||||
|
||||
{
|
||||
let document = documents.pop().unwrap();
|
||||
insta::assert_json_snapshot!(document);
|
||||
}
|
||||
|
||||
{
|
||||
let document = documents.pop().unwrap();
|
||||
insta::assert_json_snapshot!(document);
|
||||
}
|
||||
|
||||
{
|
||||
let document = documents.pop().unwrap();
|
||||
insta::assert_json_snapshot!(document);
|
||||
}
|
||||
}
|
||||
|
||||
// novector
|
||||
|
||||
insta::assert_json_snapshot!(novector_index.metadata(), @r###"
|
||||
{
|
||||
"uid": "novector",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2024-05-16T15:33:03.568055Z",
|
||||
"updatedAt": "2024-05-16T15:33:07.530217Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(novector_index.settings().unwrap().embedders, @"null");
|
||||
|
||||
{
|
||||
let documents: Result<Vec<_>> = novector_index.documents().unwrap().collect();
|
||||
let mut documents = documents.unwrap();
|
||||
assert_eq!(documents.len(), 2);
|
||||
|
||||
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
|
||||
|
||||
{
|
||||
let document = documents.pop().unwrap();
|
||||
insta::assert_json_snapshot!(document, @r###"
|
||||
{
|
||||
"id": "e1",
|
||||
"other": "random1",
|
||||
"_vectors": "toto"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
{
|
||||
let document = documents.pop().unwrap();
|
||||
insta::assert_json_snapshot!(document, @r###"
|
||||
{
|
||||
"id": "e0",
|
||||
"other": "random0"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v6_experimental() {
|
||||
let dump = File::open("tests/assets/v6-with-experimental.dump").unwrap();
|
||||
|
@ -0,0 +1,783 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: document
|
||||
---
|
||||
{
|
||||
"id": "e3",
|
||||
"desc": "overriden vector + map",
|
||||
"_vectors": {
|
||||
"default": [
|
||||
0.2,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1
|
||||
],
|
||||
"toto": [
|
||||
0.1
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,786 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: document
|
||||
---
|
||||
{
|
||||
"id": "e2",
|
||||
"desc": "natural vector + map",
|
||||
"_vectors": {
|
||||
"toto": [],
|
||||
"default": {
|
||||
"embeddings": [
|
||||
[
|
||||
-0.05189208313822746,
|
||||
-0.9273212552070618,
|
||||
0.1443813145160675,
|
||||
0.0932632014155388,
|
||||
0.2665371894836426,
|
||||
0.36266782879829407,
|
||||
0.6402910947799683,
|
||||
0.32014018297195435,
|
||||
0.030915971845388412,
|
||||
-0.9312191605567932,
|
||||
-0.3718109726905823,
|
||||
-0.2700554132461548,
|
||||
-1.1014580726623535,
|
||||
0.9154956936836244,
|
||||
-0.3406888246536255,
|
||||
1.0077725648880005,
|
||||
0.6577560901641846,
|
||||
-0.3955195546150207,
|
||||
-0.4148270785808563,
|
||||
0.1855088472366333,
|
||||
0.5062315464019775,
|
||||
-0.3632686734199524,
|
||||
-0.2277890294790268,
|
||||
0.2560805082321167,
|
||||
-0.3853609561920166,
|
||||
-0.1604762226343155,
|
||||
-0.13947471976280212,
|
||||
-0.20147813856601715,
|
||||
-0.4466346800327301,
|
||||
-0.3761846721172333,
|
||||
0.1443382054567337,
|
||||
0.18205296993255615,
|
||||
0.49359792470932007,
|
||||
-0.22538000345230105,
|
||||
-0.4996317625045776,
|
||||
-0.22734887897968292,
|
||||
-0.6034309267997742,
|
||||
-0.7857939600944519,
|
||||
-0.34923747181892395,
|
||||
-0.3466345965862274,
|
||||
0.21176661550998688,
|
||||
-0.5101462006568909,
|
||||
-0.3403083384037018,
|
||||
0.000315118464641273,
|
||||
0.236465722322464,
|
||||
-0.10246097296476364,
|
||||
-1.3013339042663574,
|
||||
0.3419138789176941,
|
||||
-0.32963496446609497,
|
||||
-0.0901619717478752,
|
||||
-0.5426247119903564,
|
||||
0.22656650841236117,
|
||||
-0.44758284091949463,
|
||||
0.14151698350906372,
|
||||
-0.1089438870549202,
|
||||
0.5500766634941101,
|
||||
-0.670711100101471,
|
||||
-0.6227269768714905,
|
||||
0.3894464075565338,
|
||||
-0.27609574794769287,
|
||||
0.7028202414512634,
|
||||
-0.19697771966457367,
|
||||
0.328511506319046,
|
||||
0.5063360929489136,
|
||||
0.4065195322036743,
|
||||
0.2614171802997589,
|
||||
-0.30274391174316406,
|
||||
1.0393824577331543,
|
||||
-0.7742937207221985,
|
||||
-0.7874112129211426,
|
||||
-0.6749666929244995,
|
||||
0.5190866589546204,
|
||||
0.004123548045754433,
|
||||
-0.28312963247299194,
|
||||
-0.038731709122657776,
|
||||
-1.0142987966537476,
|
||||
-0.09519586712121964,
|
||||
0.8755272626876831,
|
||||
0.4876938760280609,
|
||||
0.7811151742935181,
|
||||
0.85174959897995,
|
||||
0.11826585978269576,
|
||||
0.5373436808586121,
|
||||
0.3649002015590668,
|
||||
0.19064077734947205,
|
||||
-0.00287026260048151,
|
||||
-0.7305403351783752,
|
||||
-0.015206154435873032,
|
||||
-0.7899249196052551,
|
||||
0.19407285749912265,
|
||||
0.08596625179052353,
|
||||
-0.28976231813430786,
|
||||
-0.1525907665491104,
|
||||
0.3798313438892365,
|
||||
0.050306469202041626,
|
||||
-0.5697937607765198,
|
||||
0.4219021201133728,
|
||||
0.276252806186676,
|
||||
0.1559903472661972,
|
||||
0.10030482709407806,
|
||||
-0.4043720066547394,
|
||||
-0.1969818025827408,
|
||||
0.5739826560020447,
|
||||
0.2116064727306366,
|
||||
-1.4620544910430908,
|
||||
-0.7802462577819824,
|
||||
-0.24739810824394223,
|
||||
-0.09791352599859238,
|
||||
-0.4413802027702331,
|
||||
0.21549351513385773,
|
||||
-0.9520436525344848,
|
||||
-0.08762510865926743,
|
||||
0.08154498040676117,
|
||||
-0.6154940724372864,
|
||||
-1.01079523563385,
|
||||
0.885427713394165,
|
||||
0.6967288851737976,
|
||||
0.27186504006385803,
|
||||
-0.43194177746772766,
|
||||
-0.11248451471328735,
|
||||
0.7576630711555481,
|
||||
0.4998855590820313,
|
||||
0.0264343973249197,
|
||||
0.9872855544090272,
|
||||
0.5634694695472717,
|
||||
0.053698331117630005,
|
||||
0.19410227239131927,
|
||||
0.3570743501186371,
|
||||
-0.23670297861099243,
|
||||
-0.9114483594894408,
|
||||
0.07884842902421951,
|
||||
0.7318344116210938,
|
||||
0.44630110263824463,
|
||||
0.08745364099740982,
|
||||
-0.347101628780365,
|
||||
-0.4314247667789459,
|
||||
-0.5060274004936218,
|
||||
0.003706763498485088,
|
||||
0.44320008158683777,
|
||||
-0.00788921769708395,
|
||||
-0.1368623524904251,
|
||||
-0.17391923069953918,
|
||||
0.14473655819892883,
|
||||
0.10927865654230118,
|
||||
0.6974599361419678,
|
||||
0.005052129738032818,
|
||||
-0.016953065991401672,
|
||||
-0.1256176233291626,
|
||||
-0.036742497235536575,
|
||||
0.5591985583305359,
|
||||
-0.37619709968566895,
|
||||
0.22429119050502777,
|
||||
0.5403043031692505,
|
||||
-0.8603790998458862,
|
||||
-0.3456307053565979,
|
||||
0.9292937517166138,
|
||||
0.5074859261512756,
|
||||
0.6310645937919617,
|
||||
-0.3091641068458557,
|
||||
0.46902573108673096,
|
||||
0.7891915440559387,
|
||||
0.4499550759792328,
|
||||
0.2744995653629303,
|
||||
0.2712305784225464,
|
||||
-0.04349074140191078,
|
||||
-0.3638863265514374,
|
||||
0.7839881777763367,
|
||||
0.7352104783058167,
|
||||
-0.19457511603832245,
|
||||
-0.5957832932472229,
|
||||
-0.43704694509506226,
|
||||
-1.084769368171692,
|
||||
0.4904985725879669,
|
||||
0.5385226011276245,
|
||||
0.1891629993915558,
|
||||
0.12338479608297348,
|
||||
0.8315675258636475,
|
||||
-0.07830192148685455,
|
||||
1.0916285514831543,
|
||||
-0.28066861629486084,
|
||||
-1.3585069179534912,
|
||||
0.5203898549079895,
|
||||
0.08678033947944641,
|
||||
-0.2566044330596924,
|
||||
0.09484415501356123,
|
||||
-0.0180208683013916,
|
||||
1.0264745950698853,
|
||||
-0.023572135716676712,
|
||||
0.5864979028701782,
|
||||
0.7625196576118469,
|
||||
-0.2543414533138275,
|
||||
-0.8877770900726318,
|
||||
0.7611982822418213,
|
||||
-0.06220436468720436,
|
||||
0.937336564064026,
|
||||
0.2704363465309143,
|
||||
-0.37733694911003113,
|
||||
0.5076137781143188,
|
||||
-0.30641937255859375,
|
||||
0.6252772808074951,
|
||||
-0.0823579877614975,
|
||||
-0.03736555948853493,
|
||||
0.4131673276424408,
|
||||
-0.6514252424240112,
|
||||
0.12918265163898468,
|
||||
-0.4483584463596344,
|
||||
0.6750786304473877,
|
||||
-0.37008383870124817,
|
||||
-0.02324833907186985,
|
||||
0.38027650117874146,
|
||||
-0.26374951004981995,
|
||||
0.4346931278705597,
|
||||
0.42882832884788513,
|
||||
-0.48798441886901855,
|
||||
1.1882442235946655,
|
||||
0.5132288336753845,
|
||||
0.5284568667411804,
|
||||
-0.03538886830210686,
|
||||
0.29620853066444397,
|
||||
-1.0683696269989014,
|
||||
0.25936177372932434,
|
||||
0.10404160618782043,
|
||||
-0.25796034932136536,
|
||||
0.027896970510482788,
|
||||
-0.09225251525640488,
|
||||
1.4811025857925415,
|
||||
0.641173779964447,
|
||||
-0.13838383555412292,
|
||||
-0.3437179923057556,
|
||||
0.5667019486427307,
|
||||
-0.5400741696357727,
|
||||
0.31090837717056274,
|
||||
0.6470608115196228,
|
||||
-0.3747067153453827,
|
||||
-0.7364534735679626,
|
||||
-0.07431528717279434,
|
||||
0.5173454880714417,
|
||||
-0.6578747034072876,
|
||||
0.7107478976249695,
|
||||
-0.7918999791145325,
|
||||
-0.0648345872759819,
|
||||
0.609937846660614,
|
||||
-0.7329513430595398,
|
||||
0.9741371870040894,
|
||||
0.17912346124649048,
|
||||
-0.02658769302070141,
|
||||
0.5162150859832764,
|
||||
-0.3978803157806397,
|
||||
-0.7833885550498962,
|
||||
-0.6497276425361633,
|
||||
-0.3898126780986786,
|
||||
-0.0952848568558693,
|
||||
0.2663288116455078,
|
||||
-0.1604052186012268,
|
||||
0.373076468706131,
|
||||
-0.8357769250869751,
|
||||
-0.05217683315277099,
|
||||
-0.2680160701274872,
|
||||
0.8389158248901367,
|
||||
0.6833611130714417,
|
||||
-0.6712407469749451,
|
||||
0.7406917214393616,
|
||||
-0.44522786140441895,
|
||||
-0.34645363688468933,
|
||||
-0.27384576201438904,
|
||||
-0.9878405928611756,
|
||||
-0.8166060447692871,
|
||||
0.06268279999494553,
|
||||
0.38567957282066345,
|
||||
-0.3274703919887543,
|
||||
0.5296315550804138,
|
||||
-0.11810623109340668,
|
||||
0.23029841482639313,
|
||||
0.08616159111261368,
|
||||
-0.2195747196674347,
|
||||
0.09430307894945145,
|
||||
0.4057176411151886,
|
||||
0.4892159104347229,
|
||||
-0.1636916548013687,
|
||||
-0.6071445345878601,
|
||||
0.41256585717201233,
|
||||
0.622254490852356,
|
||||
-0.41223976016044617,
|
||||
-0.6686707139015198,
|
||||
-0.7474371790885925,
|
||||
-0.8509522080421448,
|
||||
-0.16754287481307983,
|
||||
-0.9078601002693176,
|
||||
-0.29653599858283997,
|
||||
-0.5020652413368225,
|
||||
0.4692700505256653,
|
||||
0.01281109917908907,
|
||||
-0.16071580350399017,
|
||||
0.03388889133930206,
|
||||
-0.020511148497462273,
|
||||
0.5027827024459839,
|
||||
-0.20729811489582065,
|
||||
0.48107290267944336,
|
||||
0.33669769763946533,
|
||||
-0.5275911688804626,
|
||||
0.48271527886390686,
|
||||
0.2738940715789795,
|
||||
-0.033152539283037186,
|
||||
-0.13629786670207977,
|
||||
-0.05965912342071533,
|
||||
-0.26200807094573975,
|
||||
0.04002794995903969,
|
||||
-0.34095603227615356,
|
||||
-3.986898899078369,
|
||||
-0.46819332242012024,
|
||||
-0.422744482755661,
|
||||
-0.169097900390625,
|
||||
0.6008929014205933,
|
||||
0.058016058057546616,
|
||||
-0.11401277780532836,
|
||||
-0.3077819049358368,
|
||||
-0.09595538675785063,
|
||||
0.6723822355270386,
|
||||
0.19367831945419312,
|
||||
0.28304359316825867,
|
||||
0.1609862744808197,
|
||||
0.7567598819732666,
|
||||
0.6889985799789429,
|
||||
0.06907720118761063,
|
||||
-0.04188092052936554,
|
||||
-0.7434936165809631,
|
||||
0.13321782648563385,
|
||||
0.8456063270568848,
|
||||
-0.10364038497209548,
|
||||
-0.45084846019744873,
|
||||
-0.4758241474628449,
|
||||
0.43882066011428833,
|
||||
-0.6432598829269409,
|
||||
0.7217311859130859,
|
||||
-0.24189773201942444,
|
||||
0.12737572193145752,
|
||||
-1.1008601188659668,
|
||||
-0.3305315673351288,
|
||||
0.14614742994308472,
|
||||
-0.7819333076477051,
|
||||
0.5287120342254639,
|
||||
-0.055538054555654526,
|
||||
0.1877404749393463,
|
||||
-0.6907662153244019,
|
||||
0.5616975426673889,
|
||||
-0.4611121714115143,
|
||||
-0.26109233498573303,
|
||||
-0.12898315489292145,
|
||||
-0.3724522292613983,
|
||||
-0.7191406488418579,
|
||||
-0.4425233602523804,
|
||||
-0.644108235836029,
|
||||
0.8424481153488159,
|
||||
0.17532426118850708,
|
||||
-0.5121750235557556,
|
||||
-0.6467239260673523,
|
||||
-0.0008507720194756985,
|
||||
0.7866212129592896,
|
||||
-0.02644744887948036,
|
||||
-0.005045140627771616,
|
||||
0.015782782807946205,
|
||||
0.16334445774555206,
|
||||
-0.1913367658853531,
|
||||
-0.13697923719882965,
|
||||
-0.6684983372688293,
|
||||
0.18346354365348816,
|
||||
-0.341105580329895,
|
||||
0.5427411198616028,
|
||||
0.3779832422733307,
|
||||
-0.6778115034103394,
|
||||
-0.2931850254535675,
|
||||
-0.8805161714553833,
|
||||
-0.4212774932384491,
|
||||
-0.5368952751159668,
|
||||
-1.3937891721725464,
|
||||
-1.225494146347046,
|
||||
0.4276703894138336,
|
||||
1.1205668449401855,
|
||||
-0.6005299687385559,
|
||||
0.15732505917549133,
|
||||
-0.3914784789085388,
|
||||
-1.357046604156494,
|
||||
-0.4707142114639282,
|
||||
-0.1497287154197693,
|
||||
-0.25035548210144043,
|
||||
-0.34328439831733704,
|
||||
0.39083412289619446,
|
||||
0.1623048633337021,
|
||||
-0.9275814294815063,
|
||||
-0.6430015563964844,
|
||||
0.2973862886428833,
|
||||
0.5580436587333679,
|
||||
-0.6232585310935974,
|
||||
-0.6611042022705078,
|
||||
0.4015969038009643,
|
||||
-1.0232892036437988,
|
||||
-0.2585645020008087,
|
||||
-0.5431421399116516,
|
||||
0.5021264553070068,
|
||||
-0.48601630330085754,
|
||||
-0.010242084041237833,
|
||||
0.5862035155296326,
|
||||
0.7316920161247253,
|
||||
0.4036808013916016,
|
||||
0.4269520044326782,
|
||||
-0.705938458442688,
|
||||
0.7747307419776917,
|
||||
0.10164368897676468,
|
||||
0.7887958884239197,
|
||||
-0.9612497091293336,
|
||||
0.12755516171455383,
|
||||
0.06812842190265656,
|
||||
-0.022603651508688927,
|
||||
0.14722754061222076,
|
||||
-0.5588505268096924,
|
||||
-0.20689940452575684,
|
||||
0.3557641804218292,
|
||||
-0.6812759637832642,
|
||||
0.2860803008079529,
|
||||
-0.38954633474349976,
|
||||
0.1759403496980667,
|
||||
-0.5678874850273132,
|
||||
-0.1692986786365509,
|
||||
-0.14578519761562347,
|
||||
0.5711379051208496,
|
||||
1.0208125114440918,
|
||||
0.7759483456611633,
|
||||
-0.372348427772522,
|
||||
-0.5460885763168335,
|
||||
0.7190321683883667,
|
||||
-0.6914990544319153,
|
||||
0.13365162909030914,
|
||||
-0.4854792356491089,
|
||||
0.4054908752441406,
|
||||
0.4502798914909363,
|
||||
-0.3041122555732727,
|
||||
-0.06726965308189392,
|
||||
-0.05570871382951737,
|
||||
-0.0455719493329525,
|
||||
0.4785125255584717,
|
||||
0.8867972493171692,
|
||||
0.4107886850833893,
|
||||
0.6121342182159424,
|
||||
-0.20477132499217987,
|
||||
-0.5598517656326294,
|
||||
-0.6443566679954529,
|
||||
-0.5905212759971619,
|
||||
-0.5571200251579285,
|
||||
0.17573799192905426,
|
||||
-0.28621870279312134,
|
||||
0.1685224026441574,
|
||||
0.09719007462263109,
|
||||
-0.04223639518022537,
|
||||
-0.28623101115226746,
|
||||
-0.1449810117483139,
|
||||
-0.3789580464363098,
|
||||
-0.5227636098861694,
|
||||
-0.049728814512491226,
|
||||
0.7849089503288269,
|
||||
0.16792525351047516,
|
||||
0.9849340915679932,
|
||||
-0.6559549570083618,
|
||||
0.35723909735679626,
|
||||
-0.6822739243507385,
|
||||
1.2873116731643677,
|
||||
0.19993330538272855,
|
||||
0.03512010723352432,
|
||||
-0.6972134113311768,
|
||||
0.18453484773635864,
|
||||
-0.2437680810689926,
|
||||
0.2156416028738022,
|
||||
0.5230382680892944,
|
||||
0.22020135819911957,
|
||||
0.8314080238342285,
|
||||
0.15627102553844452,
|
||||
-0.7330264449119568,
|
||||
0.3888184726238251,
|
||||
-0.22034703195095065,
|
||||
0.5457669496536255,
|
||||
-0.48084837198257446,
|
||||
-0.45576658844947815,
|
||||
-0.09287727624177931,
|
||||
-0.06968110054731369,
|
||||
0.35125672817230225,
|
||||
-0.4278119504451752,
|
||||
0.2038476765155792,
|
||||
0.11392722278833388,
|
||||
0.9433983564376832,
|
||||
-0.4097744226455689,
|
||||
0.035297419875860214,
|
||||
-0.4274404048919678,
|
||||
-0.25100165605545044,
|
||||
1.0943366289138794,
|
||||
-0.07634022831916809,
|
||||
-0.2925529479980469,
|
||||
-0.7512530088424683,
|
||||
0.2649727463722229,
|
||||
-0.4078235328197479,
|
||||
-0.3372223973274231,
|
||||
0.05190162733197212,
|
||||
0.005654910113662481,
|
||||
-0.0001571219472680241,
|
||||
-0.35445958375930786,
|
||||
-0.7837416529655457,
|
||||
0.1500556766986847,
|
||||
0.4383024573326111,
|
||||
0.6099548935890198,
|
||||
0.05951934307813645,
|
||||
-0.21325334906578064,
|
||||
0.0199207104742527,
|
||||
-0.22704418003559113,
|
||||
-0.6481077671051025,
|
||||
0.37442275881767273,
|
||||
-1.015955924987793,
|
||||
0.38637226819992065,
|
||||
-0.06489371508359909,
|
||||
-0.494120329618454,
|
||||
0.3469836115837097,
|
||||
0.15402406454086304,
|
||||
-0.7660972476005554,
|
||||
-0.7053225040435791,
|
||||
-0.25964751839637756,
|
||||
0.014004424214363098,
|
||||
-0.2860170006752014,
|
||||
-0.17565494775772095,
|
||||
-0.45117494463920593,
|
||||
-0.0031954257283359766,
|
||||
0.09676837921142578,
|
||||
-0.514464259147644,
|
||||
0.41698193550109863,
|
||||
-0.21642713248729703,
|
||||
-0.5398141145706177,
|
||||
-0.3647628426551819,
|
||||
0.37005379796028137,
|
||||
0.239425927400589,
|
||||
-0.08833975344896317,
|
||||
0.934946596622467,
|
||||
-0.48340797424316406,
|
||||
0.6241437792778015,
|
||||
-0.7253676652908325,
|
||||
-0.04303571209311485,
|
||||
1.1125205755233765,
|
||||
-0.15692919492721558,
|
||||
-0.2914651036262512,
|
||||
-0.5117168426513672,
|
||||
0.21365483105182648,
|
||||
0.4924402534961701,
|
||||
0.5269662141799927,
|
||||
0.0352792888879776,
|
||||
-0.149167999625206,
|
||||
-0.6019760370254517,
|
||||
0.08245442807674408,
|
||||
0.4900692105293274,
|
||||
0.518824577331543,
|
||||
-0.00005570516441366635,
|
||||
-0.553304135799408,
|
||||
0.22217543423175812,
|
||||
0.5047767758369446,
|
||||
0.135724738240242,
|
||||
1.1511540412902832,
|
||||
-0.3541218340396881,
|
||||
-0.9712511897087096,
|
||||
0.8353699445724487,
|
||||
-0.39227569103240967,
|
||||
-0.9117669463157654,
|
||||
-0.26349931955337524,
|
||||
0.05597023293375969,
|
||||
0.20695461332798004,
|
||||
0.3178807199001312,
|
||||
1.0663238763809204,
|
||||
0.5062212347984314,
|
||||
0.7288597822189331,
|
||||
0.09899299591779707,
|
||||
0.553720235824585,
|
||||
0.675009548664093,
|
||||
-0.20067055523395536,
|
||||
0.3138423264026642,
|
||||
-0.6886593103408813,
|
||||
-0.2910398542881012,
|
||||
-1.3186300992965698,
|
||||
-0.4684459865093231,
|
||||
-0.095743365585804,
|
||||
-0.1257995069026947,
|
||||
-0.4858281314373016,
|
||||
-0.4935407340526581,
|
||||
-0.3266896903514862,
|
||||
-0.3928797245025635,
|
||||
-0.40803104639053345,
|
||||
-0.9975396394729614,
|
||||
0.4229583740234375,
|
||||
0.37309643626213074,
|
||||
0.4431034922599793,
|
||||
0.30364808440208435,
|
||||
-0.3765178918838501,
|
||||
0.5616499185562134,
|
||||
0.16904796659946442,
|
||||
-0.7343707084655762,
|
||||
0.2560209631919861,
|
||||
0.6166825294494629,
|
||||
0.3200829327106476,
|
||||
-0.4483652710914612,
|
||||
0.16224201023578644,
|
||||
-0.31495288014411926,
|
||||
-0.42713335156440735,
|
||||
0.7270734906196594,
|
||||
0.7049484848976135,
|
||||
-0.0571461021900177,
|
||||
0.04477125033736229,
|
||||
-0.6647796034812927,
|
||||
1.183672308921814,
|
||||
0.36199676990509033,
|
||||
0.046881116926670074,
|
||||
0.4515796303749085,
|
||||
0.9278061985969543,
|
||||
0.31471705436706543,
|
||||
-0.7073333859443665,
|
||||
-0.3443860113620758,
|
||||
0.5440067052841187,
|
||||
-0.15020819008350372,
|
||||
-0.541202962398529,
|
||||
0.5203295946121216,
|
||||
1.2192286252975464,
|
||||
-0.9983593225479126,
|
||||
-0.18758884072303772,
|
||||
0.2758221924304962,
|
||||
-0.6511523723602295,
|
||||
-0.1584404855966568,
|
||||
-0.236241415143013,
|
||||
0.2692437767982483,
|
||||
-0.4941152036190033,
|
||||
0.4987454116344452,
|
||||
-0.3331359028816223,
|
||||
0.3163745701313019,
|
||||
0.745529294013977,
|
||||
-0.2905873656272888,
|
||||
0.13602906465530396,
|
||||
0.4679684340953827,
|
||||
1.0555986166000366,
|
||||
1.075700044631958,
|
||||
0.5368486046791077,
|
||||
-0.5118206739425659,
|
||||
0.8668332099914551,
|
||||
-0.5726966857910156,
|
||||
-0.7811751961708069,
|
||||
0.1938626915216446,
|
||||
-0.1929349899291992,
|
||||
0.1757766306400299,
|
||||
0.6384295225143433,
|
||||
0.26462844014167786,
|
||||
0.9542630314826964,
|
||||
0.19313029944896695,
|
||||
1.264248013496399,
|
||||
-0.6304428577423096,
|
||||
0.0487106591463089,
|
||||
-0.16211535036563873,
|
||||
-0.7894763350486755,
|
||||
0.3582514822483063,
|
||||
-0.04153040423989296,
|
||||
0.635784387588501,
|
||||
0.6554391980171204,
|
||||
-0.47010496258735657,
|
||||
-0.8302040696144104,
|
||||
-0.1350124627351761,
|
||||
0.2568812072277069,
|
||||
0.13614831864833832,
|
||||
-0.2563649117946625,
|
||||
-1.0434694290161133,
|
||||
0.3232482671737671,
|
||||
0.47882452607154846,
|
||||
0.4298652410507202,
|
||||
1.0563770532608032,
|
||||
-0.28917592763900757,
|
||||
-0.8533256649971008,
|
||||
0.10648339986801147,
|
||||
0.6376127004623413,
|
||||
-0.20832888782024384,
|
||||
0.2370245456695557,
|
||||
0.0018312990432605147,
|
||||
-0.2034837007522583,
|
||||
0.01051164511591196,
|
||||
-1.105310082435608,
|
||||
0.29724350571632385,
|
||||
0.15604574978351593,
|
||||
0.1973688006401062,
|
||||
0.44394731521606445,
|
||||
0.3974513411521912,
|
||||
-0.13625948131084442,
|
||||
0.9571986198425292,
|
||||
0.2257384955883026,
|
||||
0.2323588728904724,
|
||||
-0.5583669543266296,
|
||||
-0.7854922413825989,
|
||||
0.1647188365459442,
|
||||
-1.6098142862319946,
|
||||
0.318587988615036,
|
||||
-0.13399995863437653,
|
||||
-0.2172701060771942,
|
||||
-0.767514705657959,
|
||||
-0.5813586711883545,
|
||||
-0.3195130527019501,
|
||||
-0.04894036799669266,
|
||||
0.2929930090904236,
|
||||
-0.8213384747505188,
|
||||
0.07181350141763687,
|
||||
0.7469993829727173,
|
||||
0.6407455801963806,
|
||||
0.16365697979927063,
|
||||
0.7870153188705444,
|
||||
0.6524736881256104,
|
||||
0.6399973630905151,
|
||||
-0.04992736503481865,
|
||||
-0.03959266096353531,
|
||||
-0.2512352466583252,
|
||||
0.8448855876922607,
|
||||
-0.1422702670097351,
|
||||
0.1216789186000824,
|
||||
-1.2647287845611572,
|
||||
0.5931149125099182,
|
||||
0.7186052203178406,
|
||||
-0.06118432432413101,
|
||||
-1.1942816972732544,
|
||||
-0.17677085101604462,
|
||||
0.31543800234794617,
|
||||
-0.32252824306488037,
|
||||
0.8255583047866821,
|
||||
-0.14529970288276672,
|
||||
-0.2695446312427521,
|
||||
-0.33378756046295166,
|
||||
-0.1653425395488739,
|
||||
0.1454019844532013,
|
||||
-0.3920115828514099,
|
||||
0.912214994430542,
|
||||
-0.7279734015464783,
|
||||
0.7374742031097412,
|
||||
0.933980405330658,
|
||||
0.13429680466651917,
|
||||
-0.514870285987854,
|
||||
0.3989711999893189,
|
||||
-0.11613689363002776,
|
||||
0.4022413492202759,
|
||||
-0.9990655779838562,
|
||||
-0.33749932050704956,
|
||||
-0.4334589838981629,
|
||||
-1.376373291015625,
|
||||
-0.2993924915790558,
|
||||
-0.09454808384180068,
|
||||
-0.01314175222069025,
|
||||
-0.001090060803107917,
|
||||
0.2137461006641388,
|
||||
0.2938512861728668,
|
||||
0.17508235573768616,
|
||||
0.8260607123374939,
|
||||
-0.7218498587608337,
|
||||
0.2414487451314926,
|
||||
-0.47296759486198425,
|
||||
-0.3002610504627228,
|
||||
-1.238540768623352,
|
||||
0.08663805574178696,
|
||||
0.6805586218833923,
|
||||
0.5909030437469482,
|
||||
-0.42807504534721375,
|
||||
-0.22887496650218964,
|
||||
0.47537800669670105,
|
||||
-1.0474627017974854,
|
||||
0.6338009238243103,
|
||||
0.06548397243022919,
|
||||
0.4971011281013489,
|
||||
1.3484878540039063
|
||||
]
|
||||
],
|
||||
"userProvided": false
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,785 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: document
|
||||
---
|
||||
{
|
||||
"id": "e1",
|
||||
"desc": "natural vector",
|
||||
"_vectors": {
|
||||
"default": {
|
||||
"embeddings": [
|
||||
[
|
||||
-0.2979458272457123,
|
||||
-0.5288640856742859,
|
||||
-0.019957859069108963,
|
||||
-0.18495318293571472,
|
||||
0.7429973483085632,
|
||||
0.5238497257232666,
|
||||
0.432366281747818,
|
||||
0.32744166254997253,
|
||||
0.0020762972999364138,
|
||||
-0.9507834911346436,
|
||||
-0.35097137093544006,
|
||||
0.08469701558351517,
|
||||
-1.4176613092422483,
|
||||
0.4647577106952667,
|
||||
-0.69340580701828,
|
||||
1.0372896194458008,
|
||||
0.3716741800308227,
|
||||
0.06031008064746857,
|
||||
-0.6152024269104004,
|
||||
0.007914665155112743,
|
||||
0.7954924702644348,
|
||||
-0.20773003995418549,
|
||||
0.09376765787601472,
|
||||
0.04508133605122566,
|
||||
-0.2084471583366394,
|
||||
-0.1518009901046753,
|
||||
0.018195509910583496,
|
||||
-0.07044368237257004,
|
||||
-0.18119366466999057,
|
||||
-0.4480230510234833,
|
||||
0.3822529911994934,
|
||||
0.1911812424659729,
|
||||
0.4674372375011444,
|
||||
0.06963984668254852,
|
||||
-0.09341949224472046,
|
||||
0.005675444379448891,
|
||||
-0.6774799227714539,
|
||||
-0.7066726684570313,
|
||||
-0.39256376028060913,
|
||||
0.04005039855837822,
|
||||
0.2084812968969345,
|
||||
-0.7872875928878784,
|
||||
-0.8205880522727966,
|
||||
0.2919981777667999,
|
||||
-0.06004738807678223,
|
||||
-0.4907574355602264,
|
||||
-1.5937862396240234,
|
||||
0.24249385297298431,
|
||||
-0.14709846675395966,
|
||||
-0.11860740929841997,
|
||||
-0.8299489617347717,
|
||||
0.472964346408844,
|
||||
-0.497518390417099,
|
||||
-0.22205302119255063,
|
||||
-0.4196169078350067,
|
||||
0.32697558403015137,
|
||||
-0.360930860042572,
|
||||
-0.9789686799049376,
|
||||
0.1887447088956833,
|
||||
-0.403737336397171,
|
||||
0.18524253368377688,
|
||||
0.3768732249736786,
|
||||
0.3666233420372009,
|
||||
0.3511938452720642,
|
||||
0.6985810995101929,
|
||||
0.41721710562705994,
|
||||
0.09754953533411026,
|
||||
0.6204307079315186,
|
||||
-1.0762996673583984,
|
||||
-0.06263761967420578,
|
||||
-0.7376511693000793,
|
||||
0.6849768161773682,
|
||||
-0.1745152473449707,
|
||||
-0.40449759364128113,
|
||||
0.20757411420345304,
|
||||
-0.8424443006515503,
|
||||
0.330015629529953,
|
||||
0.3489064872264862,
|
||||
1.0954371690750122,
|
||||
0.8487558960914612,
|
||||
1.1076823472976685,
|
||||
0.61430823802948,
|
||||
0.4155903458595276,
|
||||
0.4111340939998626,
|
||||
0.05753209814429283,
|
||||
-0.06429877132177353,
|
||||
-0.765606164932251,
|
||||
-0.41703930497169495,
|
||||
-0.508820652961731,
|
||||
0.19859947264194489,
|
||||
-0.16607828438282013,
|
||||
-0.28112146258354187,
|
||||
0.11032675206661224,
|
||||
0.38809511065483093,
|
||||
-0.36498191952705383,
|
||||
-0.48671194911003113,
|
||||
0.6755134463310242,
|
||||
0.03958442434668541,
|
||||
0.4478721618652344,
|
||||
-0.10335399955511092,
|
||||
-0.9546685814857484,
|
||||
-0.6087718605995178,
|
||||
0.17498846352100372,
|
||||
0.08320838958024979,
|
||||
-1.4478336572647097,
|
||||
-0.605027437210083,
|
||||
-0.5867993235588074,
|
||||
-0.14711688458919525,
|
||||
-0.5447602272033691,
|
||||
-0.026259321719408035,
|
||||
-0.6997418403625488,
|
||||
-0.07349082082509995,
|
||||
0.10638900846242905,
|
||||
-0.7133527398109436,
|
||||
-0.9396815299987792,
|
||||
1.087092399597168,
|
||||
1.1885089874267578,
|
||||
0.4011896848678589,
|
||||
-0.4089202582836151,
|
||||
-0.10938972979784012,
|
||||
0.6726722121238708,
|
||||
0.24576938152313232,
|
||||
-0.24247920513153076,
|
||||
1.1499971151351929,
|
||||
0.47813335061073303,
|
||||
-0.05331678315997124,
|
||||
0.32338133454322815,
|
||||
0.4870913326740265,
|
||||
-0.23144258558750153,
|
||||
-1.2023426294326782,
|
||||
0.2349330335855484,
|
||||
1.080536961555481,
|
||||
0.29334118962287903,
|
||||
0.391574501991272,
|
||||
-0.15818795561790466,
|
||||
-0.2948290705680847,
|
||||
-0.024689948186278343,
|
||||
0.06602869182825089,
|
||||
0.5937030911445618,
|
||||
-0.047901444137096405,
|
||||
-0.512734591960907,
|
||||
-0.35780075192451477,
|
||||
0.28751692175865173,
|
||||
0.4298716187477112,
|
||||
0.9242428541183472,
|
||||
-0.17208744585514069,
|
||||
0.11515070497989656,
|
||||
-0.0335976779460907,
|
||||
-0.3422986567020416,
|
||||
0.5344581604003906,
|
||||
0.19895796477794647,
|
||||
0.33001241087913513,
|
||||
0.6390730142593384,
|
||||
-0.6074934005737305,
|
||||
-0.2553696632385254,
|
||||
0.9644920229911804,
|
||||
0.2699219584465027,
|
||||
0.6403993368148804,
|
||||
-0.6380003690719604,
|
||||
-0.027310986071825027,
|
||||
0.638815701007843,
|
||||
0.27719101309776306,
|
||||
-0.13553589582443237,
|
||||
0.750195324420929,
|
||||
0.1224869191646576,
|
||||
-0.20613941550254825,
|
||||
0.8444448709487915,
|
||||
0.16200250387191772,
|
||||
-0.24750925600528717,
|
||||
-0.739950954914093,
|
||||
-0.28443849086761475,
|
||||
-1.176282525062561,
|
||||
0.516107976436615,
|
||||
0.3774825632572174,
|
||||
0.10906043648719788,
|
||||
0.07962015271186829,
|
||||
0.7384604215621948,
|
||||
-0.051241904497146606,
|
||||
1.1730090379714966,
|
||||
-0.4828610122203827,
|
||||
-1.404372215270996,
|
||||
0.8811132311820984,
|
||||
-0.3839482367038727,
|
||||
0.022516896948218346,
|
||||
-0.0491158664226532,
|
||||
-0.43027013540267944,
|
||||
1.2049334049224854,
|
||||
-0.27309560775756836,
|
||||
0.6883630752563477,
|
||||
0.8264574408531189,
|
||||
-0.5020735263824463,
|
||||
-0.4874092042446137,
|
||||
0.6007202863693237,
|
||||
-0.4965405762195587,
|
||||
1.1302915811538696,
|
||||
0.032572727650403976,
|
||||
-0.3731859028339386,
|
||||
0.658271849155426,
|
||||
-0.9023059010505676,
|
||||
0.7400162220001221,
|
||||
0.014550759457051754,
|
||||
-0.19699542224407196,
|
||||
0.2319706380367279,
|
||||
-0.789058268070221,
|
||||
-0.14905710518360138,
|
||||
-0.5826214551925659,
|
||||
0.207652747631073,
|
||||
-0.4507439732551574,
|
||||
-0.3163885474205017,
|
||||
0.3604124188423157,
|
||||
-0.45119962096214294,
|
||||
0.3428427278995514,
|
||||
0.3005594313144684,
|
||||
-0.36026081442832947,
|
||||
1.1014249324798584,
|
||||
0.40884315967559814,
|
||||
0.34991952776908875,
|
||||
-0.1806638240814209,
|
||||
0.27440476417541504,
|
||||
-0.7118373513221741,
|
||||
0.4645499587059021,
|
||||
0.214790478348732,
|
||||
-0.2343102991580963,
|
||||
0.10500429570674896,
|
||||
-0.28034430742263794,
|
||||
1.2267805337905884,
|
||||
1.0561333894729614,
|
||||
-0.497364342212677,
|
||||
-0.6143305897712708,
|
||||
0.24963727593421936,
|
||||
-0.33136463165283203,
|
||||
-0.01473914459347725,
|
||||
0.495918869972229,
|
||||
-0.6985538005828857,
|
||||
-1.0033197402954102,
|
||||
0.35937801003456116,
|
||||
0.6325868368148804,
|
||||
-0.6808838844299316,
|
||||
1.0354058742523191,
|
||||
-0.7214401960372925,
|
||||
-0.33318862318992615,
|
||||
0.874398410320282,
|
||||
-0.6594992280006409,
|
||||
0.6830640435218811,
|
||||
-0.18534131348133087,
|
||||
0.024834271520376205,
|
||||
0.19901277124881744,
|
||||
-0.5992477536201477,
|
||||
-1.2126628160476685,
|
||||
-0.9245557188987732,
|
||||
-0.3898217976093292,
|
||||
-0.1286519467830658,
|
||||
0.4217943847179413,
|
||||
-0.1143646091222763,
|
||||
0.5630772709846497,
|
||||
-0.5240639448165894,
|
||||
0.21152715384960177,
|
||||
-0.3792001008987427,
|
||||
0.8266305327415466,
|
||||
1.170984387397766,
|
||||
-0.8072142004966736,
|
||||
0.11382893472909927,
|
||||
-0.17953898012638092,
|
||||
-0.1789460331201553,
|
||||
-0.15078622102737427,
|
||||
-1.2082908153533936,
|
||||
-0.7812382578849792,
|
||||
-0.10903695970773696,
|
||||
0.7303897142410278,
|
||||
-0.39054441452026367,
|
||||
0.19511254131793976,
|
||||
-0.09121843427419662,
|
||||
0.22400228679180145,
|
||||
0.30143046379089355,
|
||||
0.1141919493675232,
|
||||
0.48112115263938904,
|
||||
0.7307931780815125,
|
||||
0.09701362252235413,
|
||||
-0.2795647978782654,
|
||||
-0.3997688889503479,
|
||||
0.5540812611579895,
|
||||
0.564578115940094,
|
||||
-0.40065160393714905,
|
||||
-0.3629159033298493,
|
||||
-0.3789091110229492,
|
||||
-0.7298538088798523,
|
||||
-0.6996853351593018,
|
||||
-0.4477842152118683,
|
||||
-0.289089560508728,
|
||||
-0.6430277824401855,
|
||||
0.2344944179058075,
|
||||
0.3742927014827728,
|
||||
-0.5079357028007507,
|
||||
0.28841453790664673,
|
||||
0.06515737622976303,
|
||||
0.707315981388092,
|
||||
0.09498685598373412,
|
||||
0.8365515470504761,
|
||||
0.10002726316452026,
|
||||
-0.7695478200912476,
|
||||
0.6264724135398865,
|
||||
0.7562043070793152,
|
||||
-0.23112858831882477,
|
||||
-0.2871039807796478,
|
||||
-0.25010058283805847,
|
||||
0.2783474028110504,
|
||||
-0.03224996477365494,
|
||||
-0.9119359850883484,
|
||||
-3.6940200328826904,
|
||||
-0.5099936127662659,
|
||||
-0.1604711413383484,
|
||||
0.17453284561634064,
|
||||
0.41759559512138367,
|
||||
0.1419190913438797,
|
||||
-0.11362407356500626,
|
||||
-0.33312007784843445,
|
||||
0.11511333286762238,
|
||||
0.4667884409427643,
|
||||
-0.0031647447030991316,
|
||||
0.15879854559898376,
|
||||
0.3042248487472534,
|
||||
0.5404849052429199,
|
||||
0.8515422344207764,
|
||||
0.06286454200744629,
|
||||
0.43790125846862793,
|
||||
-0.8682025074958801,
|
||||
-0.06363756954669952,
|
||||
0.5547921657562256,
|
||||
-0.01483887154608965,
|
||||
-0.07361344993114471,
|
||||
-0.929947018623352,
|
||||
0.3502565622329712,
|
||||
-0.5080993175506592,
|
||||
1.0380364656448364,
|
||||
-0.2017953395843506,
|
||||
0.21319580078125,
|
||||
-1.0763001441955566,
|
||||
-0.556368887424469,
|
||||
0.1949922740459442,
|
||||
-0.6445739269256592,
|
||||
0.6791343688964844,
|
||||
0.21188358962535855,
|
||||
0.3736183941364288,
|
||||
-0.21800459921360016,
|
||||
0.7597446441650391,
|
||||
-0.3732394874095917,
|
||||
-0.4710160195827484,
|
||||
0.025146087631583217,
|
||||
0.05341297015547752,
|
||||
-0.9522109627723694,
|
||||
-0.6000866889953613,
|
||||
-0.08469046652317047,
|
||||
0.5966026186943054,
|
||||
0.3444081246852875,
|
||||
-0.461188405752182,
|
||||
-0.5279349088668823,
|
||||
0.10296865552663804,
|
||||
0.5175143480300903,
|
||||
-0.20671147108078003,
|
||||
0.13392412662506104,
|
||||
0.4812754988670349,
|
||||
0.2993808686733246,
|
||||
-0.3005635440349579,
|
||||
0.5141698122024536,
|
||||
-0.6239235401153564,
|
||||
0.2877119481563568,
|
||||
-0.4452739953994751,
|
||||
0.5621107816696167,
|
||||
0.5047508478164673,
|
||||
-0.4226335883140564,
|
||||
-0.18578553199768064,
|
||||
-1.1967322826385498,
|
||||
0.28178197145462036,
|
||||
-0.8692031502723694,
|
||||
-1.1812998056411743,
|
||||
-1.4526212215423584,
|
||||
0.4645712077617645,
|
||||
0.9327932000160216,
|
||||
-0.6560136675834656,
|
||||
0.461549699306488,
|
||||
-0.5621527433395386,
|
||||
-1.328449010848999,
|
||||
-0.08676894754171371,
|
||||
0.00021918353741057217,
|
||||
-0.18864136934280396,
|
||||
0.1259666532278061,
|
||||
0.18240638077259064,
|
||||
-0.14919660985469818,
|
||||
-0.8965857625007629,
|
||||
-0.7539900541305542,
|
||||
0.013973715715110302,
|
||||
0.504276692867279,
|
||||
-0.704748272895813,
|
||||
-0.6428424119949341,
|
||||
0.6303996443748474,
|
||||
-0.5404738187789917,
|
||||
-0.31176653504371643,
|
||||
-0.21262824535369873,
|
||||
0.18736739456653595,
|
||||
-0.7998970746994019,
|
||||
0.039946746081113815,
|
||||
0.7390344738960266,
|
||||
0.4283199906349182,
|
||||
0.3795057237148285,
|
||||
0.07204607129096985,
|
||||
-0.9230587482452391,
|
||||
0.9440426230430604,
|
||||
0.26272690296173096,
|
||||
0.5598306655883789,
|
||||
-1.0520871877670288,
|
||||
-0.2677186131477356,
|
||||
-0.1888762265443802,
|
||||
0.30426350235939026,
|
||||
0.4746131896972656,
|
||||
-0.5746733546257019,
|
||||
-0.4197768568992615,
|
||||
0.8565112948417664,
|
||||
-0.6767723560333252,
|
||||
0.23448683321475983,
|
||||
-0.2010004222393036,
|
||||
0.4112907350063324,
|
||||
-0.6497949957847595,
|
||||
-0.418667733669281,
|
||||
-0.4950824975967407,
|
||||
0.44438859820365906,
|
||||
1.026281714439392,
|
||||
0.482397586107254,
|
||||
-0.26220494508743286,
|
||||
-0.3640787005424499,
|
||||
0.5907743573188782,
|
||||
-0.8771642446517944,
|
||||
0.09708411991596222,
|
||||
-0.3671700060367584,
|
||||
0.4331349730491638,
|
||||
0.619417667388916,
|
||||
-0.2684665620326996,
|
||||
-0.5123821496963501,
|
||||
-0.1502324342727661,
|
||||
-0.012190685607492924,
|
||||
0.3580845892429352,
|
||||
0.8617186546325684,
|
||||
0.3493645489215851,
|
||||
1.0270192623138428,
|
||||
0.18297909200191495,
|
||||
-0.5881339311599731,
|
||||
-0.1733516901731491,
|
||||
-0.5040576457977295,
|
||||
-0.340370237827301,
|
||||
-0.26767754554748535,
|
||||
-0.28570041060447693,
|
||||
-0.032928116619586945,
|
||||
0.6029254794120789,
|
||||
0.17397655546665192,
|
||||
0.09346921741962431,
|
||||
0.27815181016921997,
|
||||
-0.46699589490890503,
|
||||
-0.8148876428604126,
|
||||
-0.3964351713657379,
|
||||
0.3812595009803772,
|
||||
0.13547226786613464,
|
||||
0.7126688361167908,
|
||||
-0.3473474085330963,
|
||||
-0.06573959439992905,
|
||||
-0.6483767032623291,
|
||||
1.4808889627456665,
|
||||
0.30924928188323975,
|
||||
-0.5085946917533875,
|
||||
-0.8613000512123108,
|
||||
0.3048902451992035,
|
||||
-0.4241599142551422,
|
||||
0.15909206867218018,
|
||||
0.5764641761779785,
|
||||
-0.07879110425710678,
|
||||
1.015336513519287,
|
||||
0.07599356025457382,
|
||||
-0.7025855779647827,
|
||||
0.30047643184661865,
|
||||
-0.35094937682151794,
|
||||
0.2522146999835968,
|
||||
-0.2338722199201584,
|
||||
-0.8326804637908936,
|
||||
-0.13695412874221802,
|
||||
-0.03452421352267265,
|
||||
0.47974953055381775,
|
||||
-0.18385636806488037,
|
||||
0.32438594102859497,
|
||||
0.1797013282775879,
|
||||
0.787494957447052,
|
||||
-0.12579888105392456,
|
||||
-0.07507286965847015,
|
||||
-0.4389670491218567,
|
||||
0.2720070779323578,
|
||||
0.8138866424560547,
|
||||
0.01974171027541161,
|
||||
-0.3057698905467987,
|
||||
-0.6709924936294556,
|
||||
0.0885881632566452,
|
||||
-0.2862754464149475,
|
||||
0.03475658595561981,
|
||||
-0.1285519152879715,
|
||||
0.3838353455066681,
|
||||
-0.2944154739379883,
|
||||
-0.4204859137535095,
|
||||
-0.4416137933731079,
|
||||
0.13426260650157928,
|
||||
0.36733248829841614,
|
||||
0.573428750038147,
|
||||
-0.14928072690963745,
|
||||
-0.026076916605234143,
|
||||
0.33286052942276,
|
||||
-0.5340145826339722,
|
||||
-0.17279052734375,
|
||||
-0.01154550164937973,
|
||||
-0.6620771884918213,
|
||||
0.18390542268753052,
|
||||
-0.08265615254640579,
|
||||
-0.2489682286977768,
|
||||
0.2429984211921692,
|
||||
-0.044153645634651184,
|
||||
-0.986578404903412,
|
||||
-0.33574509620666504,
|
||||
-0.5387663841247559,
|
||||
0.19767941534519196,
|
||||
0.12540718913078308,
|
||||
-0.3403128981590271,
|
||||
-0.4154576361179352,
|
||||
0.17275673151016235,
|
||||
0.09407442808151244,
|
||||
-0.5414086580276489,
|
||||
0.4393929839134216,
|
||||
0.1725579798221588,
|
||||
-0.4998118281364441,
|
||||
-0.6926208138465881,
|
||||
0.16552448272705078,
|
||||
0.6659538149833679,
|
||||
-0.10949844866991044,
|
||||
0.986426830291748,
|
||||
0.01748848147690296,
|
||||
0.4003709554672241,
|
||||
-0.5430638194084167,
|
||||
0.35347291827201843,
|
||||
0.6887399554252625,
|
||||
0.08274628221988678,
|
||||
0.13407137989997864,
|
||||
-0.591465950012207,
|
||||
0.3446292281150818,
|
||||
0.6069018244743347,
|
||||
0.1935492902994156,
|
||||
-0.0989871397614479,
|
||||
0.07008486241102219,
|
||||
-0.8503749370574951,
|
||||
-0.09507356584072112,
|
||||
0.6259510517120361,
|
||||
0.13934025168418884,
|
||||
0.06392545253038406,
|
||||
-0.4112265408039093,
|
||||
-0.08475656062364578,
|
||||
0.4974113404750824,
|
||||
-0.30606114864349365,
|
||||
1.111435890197754,
|
||||
-0.018766529858112335,
|
||||
-0.8422622680664063,
|
||||
0.4325508773326874,
|
||||
-0.2832120656967163,
|
||||
-0.4859798848628998,
|
||||
-0.41498348116874695,
|
||||
0.015977520495653152,
|
||||
0.5292825698852539,
|
||||
0.4538311660289765,
|
||||
1.1328668594360352,
|
||||
0.22632671892642975,
|
||||
0.7918671369552612,
|
||||
0.33401933312416077,
|
||||
0.7306135296821594,
|
||||
0.3548600673675537,
|
||||
0.12506209313869476,
|
||||
0.8573207855224609,
|
||||
-0.5818327069282532,
|
||||
-0.6953738927841187,
|
||||
-1.6171947717666626,
|
||||
-0.1699674427509308,
|
||||
0.6318262815475464,
|
||||
-0.05671752244234085,
|
||||
-0.28145185112953186,
|
||||
-0.3976689279079437,
|
||||
-0.2041076272726059,
|
||||
-0.5495951175689697,
|
||||
-0.5152917504310608,
|
||||
-0.9309796094894408,
|
||||
0.101932130753994,
|
||||
0.1367802917957306,
|
||||
0.1490798443555832,
|
||||
0.5304336547851563,
|
||||
-0.5082434415817261,
|
||||
0.06688683480024338,
|
||||
0.14657628536224365,
|
||||
-0.782435953617096,
|
||||
0.2962816655635834,
|
||||
0.6965363621711731,
|
||||
0.8496337532997131,
|
||||
-0.3042965829372406,
|
||||
0.04343798756599426,
|
||||
0.0330701619386673,
|
||||
-0.5662598013877869,
|
||||
1.1086925268173218,
|
||||
0.756072998046875,
|
||||
-0.204134538769722,
|
||||
0.2404300570487976,
|
||||
-0.47848284244537354,
|
||||
1.3659011125564575,
|
||||
0.5645433068275452,
|
||||
-0.15836156904697418,
|
||||
0.43395575881004333,
|
||||
0.5944653749465942,
|
||||
1.0043466091156006,
|
||||
-0.49446743726730347,
|
||||
-0.5954391360282898,
|
||||
0.5341240763664246,
|
||||
0.020598189905285835,
|
||||
-0.4036853015422821,
|
||||
0.4473709762096405,
|
||||
1.1998231410980225,
|
||||
-0.9317775368690492,
|
||||
-0.23321466147899628,
|
||||
0.2052552700042725,
|
||||
-0.7423108816146851,
|
||||
-0.19917210936546328,
|
||||
-0.1722569614648819,
|
||||
-0.034072667360305786,
|
||||
-0.00671181408688426,
|
||||
0.46396249532699585,
|
||||
-0.1372445821762085,
|
||||
0.053376372903585434,
|
||||
0.7392690777778625,
|
||||
-0.38447609543800354,
|
||||
0.07497968524694443,
|
||||
0.5197252631187439,
|
||||
1.3746477365493774,
|
||||
0.9060075879096984,
|
||||
0.20000585913658145,
|
||||
-0.4053704142570496,
|
||||
0.7497360110282898,
|
||||
-0.34087055921554565,
|
||||
-1.101803183555603,
|
||||
0.273650586605072,
|
||||
-0.5125769376754761,
|
||||
0.22472351789474487,
|
||||
0.480757474899292,
|
||||
-0.19845178723335263,
|
||||
0.8857700824737549,
|
||||
0.30752456188201904,
|
||||
1.1109285354614258,
|
||||
-0.6768012642860413,
|
||||
0.524367094039917,
|
||||
-0.22495046257972717,
|
||||
-0.4224412739276886,
|
||||
0.40753406286239624,
|
||||
-0.23133376240730288,
|
||||
0.3297771215438843,
|
||||
0.4905449151992798,
|
||||
-0.6813114285469055,
|
||||
-0.7543983459472656,
|
||||
-0.5599071383476257,
|
||||
0.14351597428321838,
|
||||
-0.029278717935085297,
|
||||
-0.3970443606376648,
|
||||
-0.303079217672348,
|
||||
0.24161772429943085,
|
||||
0.008353390730917454,
|
||||
-0.0062365154735744,
|
||||
1.0824860334396362,
|
||||
-0.3704061508178711,
|
||||
-1.0337258577346802,
|
||||
0.04638749733567238,
|
||||
1.163011074066162,
|
||||
-0.31737643480300903,
|
||||
0.013986887410283089,
|
||||
0.19223114848136905,
|
||||
-0.2260770797729492,
|
||||
-0.210910826921463,
|
||||
-1.0191949605941772,
|
||||
0.22356095910072327,
|
||||
0.09353553503751756,
|
||||
0.18096882104873657,
|
||||
0.14867214858531952,
|
||||
0.43408671021461487,
|
||||
-0.33312076330184937,
|
||||
0.8173948526382446,
|
||||
0.6428242921829224,
|
||||
0.20215003192424777,
|
||||
-0.6634518504142761,
|
||||
-0.4132290482521057,
|
||||
0.29815030097961426,
|
||||
-1.579406976699829,
|
||||
-0.0981958732008934,
|
||||
-0.03941014781594277,
|
||||
0.1709178239107132,
|
||||
-0.5481140613555908,
|
||||
-0.5338194966316223,
|
||||
-0.3528362512588501,
|
||||
-0.11561278253793716,
|
||||
-0.21793591976165771,
|
||||
-1.1570470333099363,
|
||||
0.2157980799674988,
|
||||
0.42083489894866943,
|
||||
0.9639263153076172,
|
||||
0.09747201204299928,
|
||||
0.15671424567699432,
|
||||
0.4034591615200043,
|
||||
0.6728067994117737,
|
||||
-0.5216875672340393,
|
||||
0.09657668322324751,
|
||||
-0.2416689097881317,
|
||||
0.747975766658783,
|
||||
0.1021689772605896,
|
||||
0.11652665585279463,
|
||||
-1.0484966039657593,
|
||||
0.8489304780960083,
|
||||
0.7169828414916992,
|
||||
-0.09012343734502792,
|
||||
-1.3173753023147583,
|
||||
0.057890523225069046,
|
||||
-0.006231260951608419,
|
||||
-0.1018214002251625,
|
||||
0.936040461063385,
|
||||
-0.0502331368625164,
|
||||
-0.4284322261810303,
|
||||
-0.38209280371665955,
|
||||
-0.22668412327766416,
|
||||
0.0782942995429039,
|
||||
-0.4881664514541626,
|
||||
0.9268959760665894,
|
||||
0.001867273123934865,
|
||||
0.42261114716529846,
|
||||
0.8283362984657288,
|
||||
0.4256294071674347,
|
||||
-0.7965338826179504,
|
||||
0.4840078353881836,
|
||||
-0.19861412048339844,
|
||||
0.33977967500686646,
|
||||
-0.4604192078113556,
|
||||
-0.3107339143753052,
|
||||
-0.2839638590812683,
|
||||
-1.5734281539916992,
|
||||
0.005220232997089624,
|
||||
0.09239906817674635,
|
||||
-0.7828494906425476,
|
||||
-0.1397123783826828,
|
||||
0.2576255202293396,
|
||||
0.21372435986995697,
|
||||
-0.23169949650764465,
|
||||
0.4016408920288086,
|
||||
-0.462497353553772,
|
||||
-0.2186472862958908,
|
||||
-0.5617868900299072,
|
||||
-0.3649831712245941,
|
||||
-1.1585862636566162,
|
||||
-0.08222806453704834,
|
||||
0.931126832962036,
|
||||
0.4327389597892761,
|
||||
-0.46451422572135925,
|
||||
-0.5430706143379211,
|
||||
-0.27434298396110535,
|
||||
-0.9479129314422609,
|
||||
0.1845661848783493,
|
||||
0.3972720205783844,
|
||||
0.4883299469947815,
|
||||
1.04031240940094
|
||||
]
|
||||
],
|
||||
"userProvided": false
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,780 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: document
|
||||
---
|
||||
{
|
||||
"id": "e0",
|
||||
"desc": "overriden vector",
|
||||
"_vectors": {
|
||||
"default": [
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1,
|
||||
0.1
|
||||
]
|
||||
}
|
||||
}
|
BIN
dump/tests/assets/v6-with-vectors.dump
Normal file
BIN
dump/tests/assets/v6-with-vectors.dump
Normal file
Binary file not shown.
@ -22,7 +22,6 @@ flate2 = "1.0.28"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
rayon = "1.8.1"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
|
@ -31,6 +31,9 @@ use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::update::{
|
||||
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
|
||||
};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{
|
||||
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Filter};
|
||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
@ -526,8 +529,6 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
puffin::profile_function!();
|
||||
|
||||
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
||||
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
|
||||
@ -636,8 +637,6 @@ impl IndexScheduler {
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(batch.to_string());
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
@ -785,10 +784,12 @@ impl IndexScheduler {
|
||||
let dst = temp_snapshot_dir.path().join("auth");
|
||||
fs::create_dir_all(&dst)?;
|
||||
// TODO We can't use the open_auth_store_env function here but we should
|
||||
let auth = milli::heed::EnvOpenOptions::new()
|
||||
.map_size(1024 * 1024 * 1024) // 1 GiB
|
||||
.max_dbs(2)
|
||||
.open(&self.auth_path)?;
|
||||
let auth = unsafe {
|
||||
milli::heed::EnvOpenOptions::new()
|
||||
.map_size(1024 * 1024 * 1024) // 1 GiB
|
||||
.max_dbs(2)
|
||||
.open(&self.auth_path)
|
||||
}?;
|
||||
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 5. Copy and tarball the flat snapshot
|
||||
@ -914,8 +915,55 @@ impl IndexScheduler {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let (_id, doc) = ret?;
|
||||
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
|
||||
let (id, doc) = ret?;
|
||||
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
|
||||
'inject_vectors: {
|
||||
let embeddings = index.embeddings(&rtxn, id)?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(&rtxn, std::iter::once(id))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={id}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
)
|
||||
.into());
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
// don't change the entry if it already exists, because it was user-provided
|
||||
vectors.entry(embedder_name).or_insert_with(|| {
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
),
|
||||
user_provided: false,
|
||||
};
|
||||
serde_json::to_value(embeddings).unwrap()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
||||
@ -1174,8 +1222,6 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
) -> Result<Vec<Task>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||
|
@ -68,19 +68,6 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_puffin(&self) -> Result<()> {
|
||||
if self.runtime.export_puffin_reports {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Outputting Puffin reports to disk",
|
||||
feature: "export puffin reports",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/693",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
|
@ -32,7 +32,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
puffin_frame: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
|
@ -33,7 +33,6 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, Read};
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
@ -59,7 +58,6 @@ use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfi
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use puffin::FrameView;
|
||||
use rayon::current_num_threads;
|
||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||
use roaring::RoaringBitmap;
|
||||
@ -344,9 +342,6 @@ pub struct IndexScheduler {
|
||||
/// The Authorization header to send to the webhook URL.
|
||||
pub(crate) webhook_authorization_header: Option<String>,
|
||||
|
||||
/// A frame to output the indexation profiling files to disk.
|
||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
@ -401,7 +396,6 @@ impl IndexScheduler {
|
||||
cleanup_enabled: self.cleanup_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||
puffin_frame: self.puffin_frame.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
@ -453,10 +447,12 @@ impl IndexScheduler {
|
||||
)
|
||||
};
|
||||
|
||||
let env = heed::EnvOpenOptions::new()
|
||||
.max_dbs(11)
|
||||
.map_size(budget.task_db_size)
|
||||
.open(options.tasks_path)?;
|
||||
let env = unsafe {
|
||||
heed::EnvOpenOptions::new()
|
||||
.max_dbs(11)
|
||||
.map_size(budget.task_db_size)
|
||||
.open(options.tasks_path)
|
||||
}?;
|
||||
|
||||
let features = features::FeatureData::new(&env, options.instance_features)?;
|
||||
|
||||
@ -498,7 +494,6 @@ impl IndexScheduler {
|
||||
env,
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
cleanup_enabled: options.cleanup_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
@ -585,9 +580,9 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
|
||||
if let Ok(env) =
|
||||
if let Ok(env) = unsafe {
|
||||
heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
|
||||
{
|
||||
} {
|
||||
env.prepare_for_closing().wait();
|
||||
true
|
||||
} else {
|
||||
@ -619,10 +614,6 @@ impl IndexScheduler {
|
||||
run.wake_up.wait();
|
||||
|
||||
loop {
|
||||
let puffin_enabled = run.features().check_puffin().is_ok();
|
||||
puffin::set_scopes_on(puffin_enabled);
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
match run.tick() {
|
||||
Ok(TickOutcome::TickAgain(_)) => (),
|
||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||
@ -634,31 +625,6 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Let's write the previous frame to disk but only if
|
||||
// the user wanted to profile with puffin.
|
||||
if puffin_enabled {
|
||||
let mut frame_view = run.puffin_frame.lock();
|
||||
if !frame_view.is_empty() {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
tracing::error!("{e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
||||
tracing::error!("{e}");
|
||||
}
|
||||
if let Err(e) = file.sync_all() {
|
||||
tracing::error!("{e}");
|
||||
}
|
||||
// We erase this frame view as it is no more useful. We want to
|
||||
// measure the new frames now that we exported the previous ones.
|
||||
*frame_view = FrameView::default();
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
@ -1772,6 +1738,7 @@ mod tests {
|
||||
use big_s::S;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use file_store::File;
|
||||
use insta::assert_json_snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::document_formats::DocumentFormatError;
|
||||
@ -1849,7 +1816,7 @@ mod tests {
|
||||
|
||||
// To be 100% consistent between all test we're going to start the scheduler right now
|
||||
// and ensure it's in the expected starting state.
|
||||
let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(1)) {
|
||||
let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) {
|
||||
Ok(b) => b,
|
||||
Err(RecvTimeoutError::Timeout) => {
|
||||
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
|
||||
@ -1960,7 +1927,7 @@ mod tests {
|
||||
fn advance(&mut self) -> Breakpoint {
|
||||
let (breakpoint_1, b) = match self
|
||||
.test_breakpoint_rcv
|
||||
.recv_timeout(std::time::Duration::from_secs(5))
|
||||
.recv_timeout(std::time::Duration::from_secs(50))
|
||||
{
|
||||
Ok(b) => b,
|
||||
Err(RecvTimeoutError::Timeout) => {
|
||||
@ -1981,7 +1948,7 @@ mod tests {
|
||||
|
||||
let (breakpoint_2, b) = match self
|
||||
.test_breakpoint_rcv
|
||||
.recv_timeout(std::time::Duration::from_secs(5))
|
||||
.recv_timeout(std::time::Duration::from_secs(50))
|
||||
{
|
||||
Ok(b) => b,
|
||||
Err(RecvTimeoutError::Timeout) => {
|
||||
@ -4980,4 +4947,233 @@ mod tests {
|
||||
----------------------------------------------------------------------
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_vectors() {
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use milli::update::Setting;
|
||||
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
|
||||
let mut embedders = BTreeMap::default();
|
||||
let embedding_settings = milli::vector::settings::EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::Rest),
|
||||
api_key: Setting::Set(S("My super secret")),
|
||||
url: Setting::Set(S("http://localhost:7777")),
|
||||
dimensions: Setting::Set(384),
|
||||
..Default::default()
|
||||
};
|
||||
embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings));
|
||||
|
||||
let embedding_settings = milli::vector::settings::EmbeddingSettings {
|
||||
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
|
||||
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
|
||||
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
|
||||
document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")),
|
||||
..Default::default()
|
||||
};
|
||||
embedders.insert(S("B_small_hf"), Setting::Set(embedding_settings));
|
||||
|
||||
new_settings.embedders = Setting::Set(embedders);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: S("doggos"),
|
||||
new_settings,
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap();
|
||||
let task = meilisearch_types::task_view::TaskView::from_task(&task);
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
||||
|
||||
{
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap();
|
||||
let task = meilisearch_types::task_view::TaskView::from_task(&task);
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let (name, fakerest_config) = configs.get(0).unwrap();
|
||||
insta::assert_json_snapshot!(name, @r###""A_fakerest""###);
|
||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||
let fakerest_name = name.clone();
|
||||
|
||||
let (name, simple_hf_config) = configs.get(1).unwrap();
|
||||
insta::assert_json_snapshot!(name, @r###""B_small_hf""###);
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders(configs).unwrap();
|
||||
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
||||
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
||||
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||
};
|
||||
|
||||
// add one doc, specifying vectors
|
||||
|
||||
let doc = serde_json::json!(
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
&fakerest_name: {
|
||||
// this will never trigger regeneration, which is good because we can't actually generate with
|
||||
// this embedder
|
||||
"userProvided": true,
|
||||
"embeddings": beagle_embed,
|
||||
},
|
||||
&simple_hf_name: {
|
||||
// this will be regenerated on updates
|
||||
"userProvided": false,
|
||||
"embeddings": lab_embed,
|
||||
},
|
||||
"noise": [0.1, 0.2, 0.3]
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap();
|
||||
let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
|
||||
assert_eq!(documents_count, 1);
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: UpdateDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds");
|
||||
|
||||
// check embeddings
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let doc = obkv_to_json(
|
||||
&[
|
||||
fields_ids_map.id("doggo").unwrap(),
|
||||
fields_ids_map.id("breed").unwrap(),
|
||||
fields_ids_map.id("_vectors").unwrap(),
|
||||
],
|
||||
&fields_ids_map,
|
||||
doc,
|
||||
)
|
||||
.unwrap();
|
||||
assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
|
||||
}
|
||||
|
||||
// update the doc, specifying vectors
|
||||
|
||||
let doc = serde_json::json!(
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
}
|
||||
);
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1u128).unwrap();
|
||||
let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
|
||||
assert_eq!(documents_count, 1);
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: None,
|
||||
method: UpdateDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
|
||||
|
||||
{
|
||||
// check embeddings
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
// automatically changed to patou
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
||||
// remained beagle because set to userProvided
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let doc = obkv_to_json(
|
||||
&[
|
||||
fields_ids_map.id("doggo").unwrap(),
|
||||
fields_ids_map.id("breed").unwrap(),
|
||||
fields_ids_map.id("_vectors").unwrap(),
|
||||
],
|
||||
&fields_ids_map,
|
||||
doc,
|
||||
)
|
||||
.unwrap();
|
||||
assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,19 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
"_vectors": {
|
||||
"A_fakerest": {
|
||||
"embeddings": "[vector]",
|
||||
"userProvided": true
|
||||
},
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: task.details
|
||||
---
|
||||
{
|
||||
"embedders": {
|
||||
"A_fakerest": {
|
||||
"source": "rest",
|
||||
"apiKey": "MyXXXX...",
|
||||
"dimensions": 384,
|
||||
"url": "http://localhost:7777"
|
||||
},
|
||||
"B_small_hf": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: fakerest_config.embedder_options
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
"api_key": "My super secret",
|
||||
"distribution": null,
|
||||
"dimensions": 384,
|
||||
"url": "http://localhost:7777",
|
||||
"query": null,
|
||||
"input_field": [
|
||||
"input"
|
||||
],
|
||||
"path_to_embeddings": [
|
||||
"data"
|
||||
],
|
||||
"embedding_object": [
|
||||
"embedding"
|
||||
],
|
||||
"input_type": "text"
|
||||
}
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: simple_hf_config.embedder_options
|
||||
---
|
||||
{
|
||||
"HuggingFace": {
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"distribution": null
|
||||
}
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
"A_fakerest": {
|
||||
"embeddings": "[vector]",
|
||||
"userProvided": true
|
||||
},
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
expression: task.details
|
||||
---
|
||||
{
|
||||
"embedders": {
|
||||
"A_fakerest": {
|
||||
"source": "rest",
|
||||
"apiKey": "MyXXXX...",
|
||||
"dimensions": 384,
|
||||
"url": "http://localhost:7777"
|
||||
},
|
||||
"B_small_hf": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,1,2,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,2,]
|
||||
"settingsUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,2,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,48 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [2,]
|
||||
succeeded [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,2,]
|
||||
"settingsUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,2,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000001
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,45 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"settingsUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,44 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,]
|
||||
succeeded [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"settingsUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 0, field_distribution: {} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,36 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"settingsUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,40 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"settingsUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 0, field_distribution: {} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -272,9 +272,9 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
}
|
||||
for index_uid in index_uids {
|
||||
if index_uid == swap.0 {
|
||||
*index_uid = swap.1.to_owned();
|
||||
swap.1.clone_into(index_uid);
|
||||
} else if index_uid == swap.1 {
|
||||
*index_uid = swap.0.to_owned();
|
||||
swap.0.clone_into(index_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(AUTH_STORE_SIZE); // 1GB
|
||||
options.max_dbs(2);
|
||||
options.open(path)
|
||||
unsafe { options.open(path) }
|
||||
}
|
||||
|
||||
impl HeedAuthStore {
|
||||
|
@ -189,3 +189,4 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||
merge_with_error_impl_take_error_message!(InvalidSimilarId);
|
||||
|
@ -239,18 +239,23 @@ InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||
@ -259,7 +264,9 @@ InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
@ -322,7 +329,8 @@ UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
|
||||
// Experimental features
|
||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
|
||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
||||
NotFoundSimilarId , InvalidRequest , BAD_REQUEST
|
||||
}
|
||||
|
||||
impl ErrorCode for JoinError {
|
||||
@ -384,7 +392,6 @@ impl ErrorCode for milli::Error {
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
|
||||
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
|
||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
@ -423,7 +430,6 @@ impl ErrorCode for HeedError {
|
||||
HeedError::Mdb(_)
|
||||
| HeedError::Encoding(_)
|
||||
| HeedError::Decoding(_)
|
||||
| HeedError::InvalidDatabaseTyping
|
||||
| HeedError::DatabaseClosing
|
||||
| HeedError::BadOpenOptions { .. } => Code::Internal,
|
||||
}
|
||||
@ -488,6 +494,17 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"the value of `id` is invalid. \
|
||||
A document identifier can be of type integer or string, \
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
@ -6,7 +6,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub logs_route: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
@ -67,7 +67,6 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.13"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.8.0"
|
||||
regex = "1.10.2"
|
||||
|
@ -25,6 +25,18 @@ impl SearchAggregator {
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SimilarAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl SimilarAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MultiSearchAggregator;
|
||||
|
||||
@ -66,6 +78,8 @@ impl Analytics for MockAnalytics {
|
||||
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
||||
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
|
||||
fn add_documents(
|
||||
|
@ -22,6 +22,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type SimilarAggregator = mock_analytics::SimilarAggregator;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
|
||||
@ -32,6 +34,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type SimilarAggregator = segment_analytics::SimilarAggregator;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
|
||||
@ -86,6 +90,12 @@ pub trait Analytics: Sync + Send {
|
||||
/// This method should be called to aggregate a post search
|
||||
fn post_search(&self, aggregate: SearchAggregator);
|
||||
|
||||
/// This method should be called to aggregate a get similar request
|
||||
fn get_similar(&self, aggregate: SimilarAggregator);
|
||||
|
||||
/// This method should be called to aggregate a post similar request
|
||||
fn post_similar(&self, aggregate: SimilarAggregator);
|
||||
|
||||
/// This method should be called to aggregate a post array of searches
|
||||
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
||||
|
||||
|
@ -36,8 +36,9 @@ use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||
use crate::routes::{create_all_stats, Stats};
|
||||
use crate::search::{
|
||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
|
||||
SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::Opt;
|
||||
|
||||
@ -73,6 +74,8 @@ pub enum AnalyticsMsg {
|
||||
BatchMessage(Track),
|
||||
AggregateGetSearch(SearchAggregator),
|
||||
AggregatePostSearch(SearchAggregator),
|
||||
AggregateGetSimilar(SimilarAggregator),
|
||||
AggregatePostSimilar(SimilarAggregator),
|
||||
AggregatePostMultiSearch(MultiSearchAggregator),
|
||||
AggregatePostFacetSearch(FacetSearchAggregator),
|
||||
AggregateAddDocuments(DocumentsAggregator),
|
||||
@ -149,6 +152,8 @@ impl SegmentAnalytics {
|
||||
update_documents_aggregator: DocumentsAggregator::default(),
|
||||
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||
get_similar_aggregator: SimilarAggregator::default(),
|
||||
post_similar_aggregator: SimilarAggregator::default(),
|
||||
});
|
||||
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
|
||||
|
||||
@ -184,6 +189,14 @@ impl super::Analytics for SegmentAnalytics {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
||||
}
|
||||
|
||||
fn get_similar(&self, aggregate: SimilarAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate));
|
||||
}
|
||||
|
||||
fn post_similar(&self, aggregate: SimilarAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate));
|
||||
}
|
||||
|
||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
|
||||
}
|
||||
@ -379,6 +392,8 @@ pub struct Segment {
|
||||
update_documents_aggregator: DocumentsAggregator,
|
||||
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||
get_similar_aggregator: SimilarAggregator,
|
||||
post_similar_aggregator: SimilarAggregator,
|
||||
}
|
||||
|
||||
impl Segment {
|
||||
@ -441,6 +456,8 @@ impl Segment {
|
||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg),
|
||||
None => (),
|
||||
}
|
||||
}
|
||||
@ -494,6 +511,8 @@ impl Segment {
|
||||
update_documents_aggregator,
|
||||
get_fetch_documents_aggregator,
|
||||
post_fetch_documents_aggregator,
|
||||
get_similar_aggregator,
|
||||
post_similar_aggregator,
|
||||
} = self;
|
||||
|
||||
if let Some(get_search) =
|
||||
@ -541,6 +560,18 @@ impl Segment {
|
||||
{
|
||||
let _ = self.batcher.push(post_fetch_documents).await;
|
||||
}
|
||||
|
||||
if let Some(get_similar_documents) =
|
||||
take(get_similar_aggregator).into_event(user, "Similar GET")
|
||||
{
|
||||
let _ = self.batcher.push(get_similar_documents).await;
|
||||
}
|
||||
|
||||
if let Some(post_similar_documents) =
|
||||
take(post_similar_aggregator).into_event(user, "Similar POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_similar_documents).await;
|
||||
}
|
||||
let _ = self.batcher.flush().await;
|
||||
}
|
||||
}
|
||||
@ -1558,3 +1589,235 @@ impl DocumentsFetchAggregator {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SimilarAggregator {
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
// requests
|
||||
total_received: usize,
|
||||
total_succeeded: usize,
|
||||
time_spent: BinaryHeap<usize>,
|
||||
|
||||
// filter
|
||||
filter_with_geo_radius: bool,
|
||||
filter_with_geo_bounding_box: bool,
|
||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||
filter_sum_of_criteria_terms: usize,
|
||||
// every time a request has a filter, this field must be incremented by one
|
||||
filter_total_number_of_criteria: usize,
|
||||
used_syntax: HashMap<String, usize>,
|
||||
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
|
||||
// pagination
|
||||
max_limit: usize,
|
||||
max_offset: usize,
|
||||
|
||||
// formatting
|
||||
max_attributes_to_retrieve: usize,
|
||||
|
||||
// scoring
|
||||
show_ranking_score: bool,
|
||||
show_ranking_score_details: bool,
|
||||
}
|
||||
|
||||
impl SimilarAggregator {
|
||||
#[allow(clippy::field_reassign_with_default)]
|
||||
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
|
||||
let SimilarQuery {
|
||||
id: _,
|
||||
embedder,
|
||||
offset,
|
||||
limit,
|
||||
attributes_to_retrieve: _,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||
|
||||
ret.total_received = 1;
|
||||
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||
|
||||
if let Some(ref filter) = filter {
|
||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
||||
ret.filter_total_number_of_criteria = 1;
|
||||
|
||||
let syntax = match filter {
|
||||
Value::String(_) => "string".to_string(),
|
||||
Value::Array(values) => {
|
||||
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
||||
"mixed".to_string()
|
||||
} else {
|
||||
"array".to_string()
|
||||
}
|
||||
}
|
||||
_ => "none".to_string(),
|
||||
};
|
||||
// convert the string to a HashMap
|
||||
ret.used_syntax.insert(syntax, 1);
|
||||
|
||||
let stringified_filters = filter.to_string();
|
||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
||||
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
||||
}
|
||||
|
||||
ret.max_limit = *limit;
|
||||
ret.max_offset = *offset;
|
||||
|
||||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
|
||||
ret.embedder = embedder.is_some();
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, result: &SimilarResult) {
|
||||
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
|
||||
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
|
||||
self.time_spent.push(*processing_time_ms as usize);
|
||||
}
|
||||
|
||||
/// Aggregate one [SimilarAggregator] into another.
|
||||
pub fn aggregate(&mut self, mut other: Self) {
|
||||
let Self {
|
||||
timestamp,
|
||||
user_agents,
|
||||
total_received,
|
||||
total_succeeded,
|
||||
ref mut time_spent,
|
||||
filter_with_geo_radius,
|
||||
filter_with_geo_bounding_box,
|
||||
filter_sum_of_criteria_terms,
|
||||
filter_total_number_of_criteria,
|
||||
used_syntax,
|
||||
max_limit,
|
||||
max_offset,
|
||||
max_attributes_to_retrieve,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = timestamp;
|
||||
}
|
||||
|
||||
// context
|
||||
for user_agent in user_agents.into_iter() {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
|
||||
// request
|
||||
self.total_received = self.total_received.saturating_add(total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||
self.time_spent.append(time_spent);
|
||||
|
||||
// filter
|
||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
||||
self.filter_sum_of_criteria_terms =
|
||||
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
||||
self.filter_total_number_of_criteria =
|
||||
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
||||
for (key, value) in used_syntax.into_iter() {
|
||||
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
||||
*used_syntax = used_syntax.saturating_add(value);
|
||||
}
|
||||
|
||||
self.embedder |= embedder;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
self.max_offset = self.max_offset.max(max_offset);
|
||||
|
||||
// formatting
|
||||
self.max_attributes_to_retrieve =
|
||||
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
||||
|
||||
// scoring
|
||||
self.show_ranking_score |= show_ranking_score;
|
||||
self.show_ranking_score_details |= show_ranking_score_details;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
let Self {
|
||||
timestamp,
|
||||
user_agents,
|
||||
total_received,
|
||||
total_succeeded,
|
||||
time_spent,
|
||||
filter_with_geo_radius,
|
||||
filter_with_geo_bounding_box,
|
||||
filter_sum_of_criteria_terms,
|
||||
filter_total_number_of_criteria,
|
||||
used_syntax,
|
||||
max_limit,
|
||||
max_offset,
|
||||
max_attributes_to_retrieve,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
// we get all the values in a sorted manner
|
||||
let time_spent = time_spent.into_sorted_vec();
|
||||
// the index of the 99th percentage of value
|
||||
let percentile_99th = time_spent.len() * 99 / 100;
|
||||
// We are only interested by the slowest value of the 99th fastest results
|
||||
let time_spent = time_spent.get(percentile_99th);
|
||||
|
||||
let properties = json!({
|
||||
"user-agent": user_agents,
|
||||
"requests": {
|
||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||
"total_succeeded": total_succeeded,
|
||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||
"total_received": total_received,
|
||||
},
|
||||
"filter": {
|
||||
"with_geoRadius": filter_with_geo_radius,
|
||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||
},
|
||||
"hybrid": {
|
||||
"embedder": embedder,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
"max_offset": max_offset,
|
||||
},
|
||||
"formatting": {
|
||||
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
||||
},
|
||||
"scoring": {
|
||||
"show_ranking_score": show_ranking_score,
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
},
|
||||
});
|
||||
|
||||
Some(Track {
|
||||
timestamp,
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -47,8 +47,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub logs_route: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
@ -68,21 +66,13 @@ async fn patch_features(
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
||||
export_puffin_reports: new_features
|
||||
.0
|
||||
.export_puffin_reports
|
||||
.unwrap_or(old_features.export_puffin_reports),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
// the it renames to camelCase, which we don't want for analytics.
|
||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
vector_store,
|
||||
metrics,
|
||||
logs_route,
|
||||
export_puffin_reports,
|
||||
} = new_features;
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
|
||||
new_features;
|
||||
|
||||
analytics.publish(
|
||||
"Experimental features Updated".to_string(),
|
||||
@ -90,7 +80,6 @@ async fn patch_features(
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"logs_route": logs_route,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
@ -69,7 +69,7 @@ pub async fn search(
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut search_query, search_rules);
|
||||
add_search_rules(&mut search_query.filter, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
@ -29,6 +29,7 @@ pub mod documents;
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
pub mod similar;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
@ -45,9 +46,14 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.route(web::delete().to(SeqHandler(delete_index))),
|
||||
)
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(
|
||||
web::resource("/advanced-stats")
|
||||
.route(web::get().to(SeqHandler(get_advanced_index_stats))),
|
||||
)
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/similar").configure(similar::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
@ -276,3 +282,16 @@ pub async fn get_index_stats(
|
||||
debug!(returns = ?stats, "Get index stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
pub async fn get_advanced_index_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let advanced_stats = index.advanced_stats(&rtxn)?;
|
||||
|
||||
debug!(returns = ?advanced_stats, "Get advanced index stats");
|
||||
Ok(HttpResponse::Ok().json(advanced_stats))
|
||||
}
|
||||
|
@ -196,7 +196,7 @@ pub async fn search_with_url_query(
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query, search_rules);
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
@ -235,7 +235,7 @@ pub async fn search_with_post(
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query, search_rules);
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
171
meilisearch/src/routes/indexes/similar.rs
Normal file
171
meilisearch/src/routes/indexes/similar.rs
Normal file
@ -0,0 +1,171 @@
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
|
||||
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
|
||||
InvalidSimilarShowRankingScoreDetails,
|
||||
};
|
||||
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
|
||||
use super::ActionPolicy;
|
||||
use crate::analytics::{Analytics, SimilarAggregator};
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(similar_get)))
|
||||
.route(web::post().to(SeqHandler(similar_post))),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn similar_get(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||
})?;
|
||||
|
||||
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||
|
||||
debug!(parameters = ?query, "Similar get");
|
||||
|
||||
let similar = similar(index_scheduler, index_uid, query).await;
|
||||
|
||||
if let Ok(similar) = &similar {
|
||||
aggregate.succeed(similar);
|
||||
}
|
||||
analytics.get_similar(aggregate);
|
||||
|
||||
let similar = similar?;
|
||||
|
||||
debug!(returns = ?similar, "Similar get");
|
||||
Ok(HttpResponse::Ok().json(similar))
|
||||
}
|
||||
|
||||
pub async fn similar_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<SimilarQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!(parameters = ?query, "Similar post");
|
||||
|
||||
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||
|
||||
let similar = similar(index_scheduler, index_uid, query).await;
|
||||
|
||||
if let Ok(similar) = &similar {
|
||||
aggregate.succeed(similar);
|
||||
}
|
||||
analytics.post_similar(aggregate);
|
||||
|
||||
let similar = similar?;
|
||||
|
||||
debug!(returns = ?similar, "Similar post");
|
||||
Ok(HttpResponse::Ok().json(similar))
|
||||
}
|
||||
|
||||
async fn similar(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: IndexUid,
|
||||
mut query: SimilarQuery,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
let features = index_scheduler.features();
|
||||
|
||||
features.check_vector("Using the similar API")?;
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let (embedder_name, embedder) =
|
||||
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||
|
||||
tokio::task::spawn_blocking(move || perform_similar(&index, query, embedder_name, embedder))
|
||||
.await?
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SimilarQueryGet {
|
||||
#[deserr(error = DeserrQueryParamError<InvalidSimilarId>)]
|
||||
id: Param<String>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSimilarOffset>)]
|
||||
offset: Param<usize>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSimilarLimit>)]
|
||||
limit: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
|
||||
attributes_to_retrieve: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
|
||||
filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
|
||||
show_ranking_score: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
|
||||
show_ranking_score_details: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub embedder: Option<String>,
|
||||
}
|
||||
|
||||
impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
type Error = InvalidSimilarId;
|
||||
|
||||
fn try_from(
|
||||
SimilarQueryGet {
|
||||
id,
|
||||
offset,
|
||||
limit,
|
||||
attributes_to_retrieve,
|
||||
filter,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
}: SimilarQueryGet,
|
||||
) -> Result<Self, Self::Error> {
|
||||
let filter = match filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
_ => Some(Value::String(f)),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(SimilarQuery {
|
||||
id: id.0.try_into()?,
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
filter,
|
||||
embedder,
|
||||
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||
show_ranking_score: show_ranking_score.0,
|
||||
show_ranking_score_details: show_ranking_score_details.0,
|
||||
})
|
||||
}
|
||||
}
|
@ -67,7 +67,7 @@ pub async fn multi_search_with_post(
|
||||
// Apply search rules from tenant token
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||
{
|
||||
add_search_rules(&mut query, search_rules);
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler
|
||||
|
@ -11,7 +11,7 @@ use indexmap::IndexMap;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
@ -231,7 +231,7 @@ impl SearchKind {
|
||||
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
|
||||
}
|
||||
|
||||
fn embedder(
|
||||
pub(crate) fn embedder(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index: &Index,
|
||||
embedder_name: Option<&str>,
|
||||
@ -417,6 +417,59 @@ impl SearchQueryWithIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SimilarQuery {
|
||||
#[deserr(error = DeserrJsonError<InvalidSimilarId>)]
|
||||
pub id: ExternalDocumentId,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)]
|
||||
pub limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||
pub embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)]
|
||||
pub show_ranking_score: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
|
||||
pub show_ranking_score_details: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||
#[deserr(try_from(Value) = TryFrom::try_from -> InvalidSimilarId)]
|
||||
pub struct ExternalDocumentId(String);
|
||||
|
||||
impl AsRef<str> for ExternalDocumentId {
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl ExternalDocumentId {
|
||||
pub fn into_inner(self) -> String {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<String> for ExternalDocumentId {
|
||||
type Error = InvalidSimilarId;
|
||||
|
||||
fn try_from(value: String) -> Result<Self, Self::Error> {
|
||||
serde_json::Value::String(value).try_into()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Value> for ExternalDocumentId {
|
||||
type Error = InvalidSimilarId;
|
||||
|
||||
fn try_from(value: Value) -> Result<Self, Self::Error> {
|
||||
Ok(Self(milli::documents::validate_document_id_value(value).map_err(|_| InvalidSimilarId)?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum MatchingStrategy {
|
||||
@ -424,6 +477,8 @@ pub enum MatchingStrategy {
|
||||
Last,
|
||||
/// All query words are mandatory
|
||||
All,
|
||||
/// Remove query words from the most frequent to the least
|
||||
Frequency,
|
||||
}
|
||||
|
||||
impl Default for MatchingStrategy {
|
||||
@ -437,6 +492,7 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
|
||||
match other {
|
||||
MatchingStrategy::Last => Self::Last,
|
||||
MatchingStrategy::All => Self::All,
|
||||
MatchingStrategy::Frequency => Self::Frequency,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -538,6 +594,16 @@ impl fmt::Debug for SearchResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SimilarResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub id: String,
|
||||
pub processing_time_ms: u128,
|
||||
#[serde(flatten)]
|
||||
pub hits_info: HitsInfo,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResultWithIndex {
|
||||
@ -570,8 +636,8 @@ pub struct FacetSearchResult {
|
||||
}
|
||||
|
||||
/// Incorporate search rules in search query
|
||||
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
query.filter = match (query.filter.take(), rules.filter) {
|
||||
pub fn add_search_rules(filter: &mut Option<Value>, rules: IndexSearchRules) {
|
||||
*filter = match (filter.take(), rules.filter) {
|
||||
(None, rules_filter) => rules_filter,
|
||||
(filter, None) => filter,
|
||||
(Some(filter), Some(rules_filter)) => {
|
||||
@ -719,131 +785,52 @@ pub fn perform_search(
|
||||
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
||||
};
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let SearchQuery {
|
||||
q,
|
||||
vector: _,
|
||||
hybrid: _,
|
||||
// already computed from prepare_search
|
||||
offset: _,
|
||||
limit,
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
show_matches_position,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter: _,
|
||||
sort,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
} = query;
|
||||
|
||||
let displayed_ids = index
|
||||
.displayed_fields_ids(&rtxn)?
|
||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||
|
||||
let fids = |attrs: &BTreeSet<String>| {
|
||||
let mut ids = BTreeSet::new();
|
||||
for attr in attrs {
|
||||
if attr == "*" {
|
||||
ids = displayed_ids.clone();
|
||||
break;
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr) {
|
||||
ids.insert(id);
|
||||
}
|
||||
}
|
||||
ids
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve,
|
||||
attributes_to_highlight,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
crop_marker,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
show_matches_position,
|
||||
sort,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
};
|
||||
|
||||
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
|
||||
// but these attributes must be also be present
|
||||
// - in the fields_ids_map
|
||||
// - in the displayed attributes
|
||||
let to_retrieve_ids: BTreeSet<_> = query
|
||||
.attributes_to_retrieve
|
||||
.as_ref()
|
||||
.map(fids)
|
||||
.unwrap_or_else(|| displayed_ids.clone())
|
||||
.intersection(&displayed_ids)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
|
||||
|
||||
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
|
||||
|
||||
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
|
||||
// These attributes are:
|
||||
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
|
||||
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
|
||||
// But these attributes must be also present in displayed attributes
|
||||
let formatted_options = compute_formatted_options(
|
||||
&attr_to_highlight,
|
||||
&attr_to_crop,
|
||||
query.crop_length,
|
||||
&to_retrieve_ids,
|
||||
&fields_ids_map,
|
||||
&displayed_ids,
|
||||
);
|
||||
|
||||
let mut tokenizer_builder = TokenizerBuilder::default();
|
||||
tokenizer_builder.create_char_map(true);
|
||||
|
||||
let script_lang_map = index.script_language(&rtxn)?;
|
||||
if !script_lang_map.is_empty() {
|
||||
tokenizer_builder.allow_list(&script_lang_map);
|
||||
}
|
||||
|
||||
let separators = index.allowed_separators(&rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||
formatter_builder.crop_marker(query.crop_marker);
|
||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
let documents_iter = index.documents(&rtxn, documents_ids)?;
|
||||
|
||||
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = to_retrieve_ids
|
||||
.iter()
|
||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||
let mut document =
|
||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||
|
||||
let (matches_position, formatted) = format_fields(
|
||||
&displayed_document,
|
||||
&fields_ids_map,
|
||||
&formatter_builder,
|
||||
&formatted_options,
|
||||
query.show_matches_position,
|
||||
&displayed_ids,
|
||||
)?;
|
||||
|
||||
if let Some(sort) = query.sort.as_ref() {
|
||||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let ranking_score =
|
||||
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||
let ranking_score_details =
|
||||
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
||||
|
||||
let hit = SearchHit {
|
||||
document,
|
||||
formatted,
|
||||
matches_position,
|
||||
ranking_score_details,
|
||||
ranking_score,
|
||||
};
|
||||
documents.push(hit);
|
||||
}
|
||||
let documents =
|
||||
make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?;
|
||||
|
||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||
let hits_info = if is_finite_pagination {
|
||||
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||
let hits_per_page = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
|
||||
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
|
||||
.checked_div(hits_per_page)
|
||||
@ -851,15 +838,15 @@ pub fn perform_search(
|
||||
|
||||
HitsInfo::Pagination {
|
||||
hits_per_page,
|
||||
page: query.page.unwrap_or(1),
|
||||
page: page.unwrap_or(1),
|
||||
total_pages,
|
||||
total_hits: number_of_hits,
|
||||
}
|
||||
} else {
|
||||
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
|
||||
HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
|
||||
};
|
||||
|
||||
let (facet_distribution, facet_stats) = match query.facets {
|
||||
let (facet_distribution, facet_stats) = match facets {
|
||||
Some(ref fields) => {
|
||||
let mut facet_distribution = index.facets_distribution(&rtxn);
|
||||
|
||||
@ -896,7 +883,7 @@ pub fn perform_search(
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
query: query.q.unwrap_or_default(),
|
||||
query: q.unwrap_or_default(),
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
@ -907,6 +894,130 @@ pub fn perform_search(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
struct AttributesFormat {
|
||||
attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
attributes_to_highlight: Option<HashSet<String>>,
|
||||
attributes_to_crop: Option<Vec<String>>,
|
||||
crop_length: usize,
|
||||
crop_marker: String,
|
||||
highlight_pre_tag: String,
|
||||
highlight_post_tag: String,
|
||||
show_matches_position: bool,
|
||||
sort: Option<Vec<String>>,
|
||||
show_ranking_score: bool,
|
||||
show_ranking_score_details: bool,
|
||||
}
|
||||
|
||||
fn make_hits(
|
||||
index: &Index,
|
||||
rtxn: &RoTxn<'_>,
|
||||
format: AttributesFormat,
|
||||
matching_words: milli::MatchingWords,
|
||||
documents_ids: Vec<u32>,
|
||||
document_scores: Vec<Vec<ScoreDetails>>,
|
||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
|
||||
let displayed_ids = index
|
||||
.displayed_fields_ids(rtxn)?
|
||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||
let fids = |attrs: &BTreeSet<String>| {
|
||||
let mut ids = BTreeSet::new();
|
||||
for attr in attrs {
|
||||
if attr == "*" {
|
||||
ids.clone_from(&displayed_ids);
|
||||
break;
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr) {
|
||||
ids.insert(id);
|
||||
}
|
||||
}
|
||||
ids
|
||||
};
|
||||
let to_retrieve_ids: BTreeSet<_> = format
|
||||
.attributes_to_retrieve
|
||||
.as_ref()
|
||||
.map(fids)
|
||||
.unwrap_or_else(|| displayed_ids.clone())
|
||||
.intersection(&displayed_ids)
|
||||
.cloned()
|
||||
.collect();
|
||||
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
|
||||
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
|
||||
let formatted_options = compute_formatted_options(
|
||||
&attr_to_highlight,
|
||||
&attr_to_crop,
|
||||
format.crop_length,
|
||||
&to_retrieve_ids,
|
||||
&fields_ids_map,
|
||||
&displayed_ids,
|
||||
);
|
||||
let mut tokenizer_builder = TokenizerBuilder::default();
|
||||
tokenizer_builder.create_char_map(true);
|
||||
let script_lang_map = index.script_language(rtxn)?;
|
||||
if !script_lang_map.is_empty() {
|
||||
tokenizer_builder.allow_list(&script_lang_map);
|
||||
}
|
||||
let separators = index.allowed_separators(rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
let dictionary = index.dictionary(rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||
formatter_builder.crop_marker(format.crop_marker);
|
||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||
let mut documents = Vec::new();
|
||||
let documents_iter = index.documents(rtxn, documents_ids)?;
|
||||
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = to_retrieve_ids
|
||||
.iter()
|
||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||
let mut document =
|
||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||
|
||||
let (matches_position, formatted) = format_fields(
|
||||
&displayed_document,
|
||||
&fields_ids_map,
|
||||
&formatter_builder,
|
||||
&formatted_options,
|
||||
format.show_matches_position,
|
||||
&displayed_ids,
|
||||
)?;
|
||||
|
||||
if let Some(sort) = format.sort.as_ref() {
|
||||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let ranking_score =
|
||||
format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||
let ranking_score_details =
|
||||
format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
||||
|
||||
let hit = SearchHit {
|
||||
document,
|
||||
formatted,
|
||||
matches_position,
|
||||
ranking_score_details,
|
||||
ranking_score,
|
||||
};
|
||||
documents.push(hit);
|
||||
}
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub fn perform_facet_search(
|
||||
index: &Index,
|
||||
search_query: SearchQuery,
|
||||
@ -941,6 +1052,95 @@ pub fn perform_facet_search(
|
||||
})
|
||||
}
|
||||
|
||||
pub fn perform_similar(
|
||||
index: &Index,
|
||||
query: SimilarQuery,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let SimilarQuery {
|
||||
id,
|
||||
offset,
|
||||
limit,
|
||||
filter: _,
|
||||
embedder: _,
|
||||
attributes_to_retrieve,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
} = query;
|
||||
|
||||
// using let-else rather than `?` so that the borrow checker identifies we're always returning here,
|
||||
// preventing a use-after-move
|
||||
let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else {
|
||||
return Err(ResponseError::from_msg(
|
||||
MeilisearchHttpError::DocumentNotFound(id.into_inner()).to_string(),
|
||||
Code::NotFoundSimilarId,
|
||||
));
|
||||
};
|
||||
|
||||
let mut similar =
|
||||
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter)
|
||||
// inject InvalidSimilarFilter code
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))?
|
||||
{
|
||||
similar.filter(facets);
|
||||
}
|
||||
}
|
||||
|
||||
let milli::SearchResult {
|
||||
documents_ids,
|
||||
matching_words: _,
|
||||
candidates,
|
||||
document_scores,
|
||||
degraded: _,
|
||||
used_negative_operator: _,
|
||||
} = similar.execute().map_err(|err| match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
ResponseError::from_msg(err.to_string(), Code::InvalidSimilarFilter)
|
||||
}
|
||||
err => err.into(),
|
||||
})?;
|
||||
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve,
|
||||
attributes_to_highlight: None,
|
||||
attributes_to_crop: None,
|
||||
crop_length: DEFAULT_CROP_LENGTH(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
show_matches_position: false,
|
||||
sort: None,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
};
|
||||
|
||||
let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?;
|
||||
|
||||
let max_total_hits = index
|
||||
.pagination_max_total_hits(&rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||
let hits_info = HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits };
|
||||
|
||||
let result = SimilarResult {
|
||||
hits,
|
||||
hits_info,
|
||||
id: id.into_inner(),
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
lazy_static::lazy_static! {
|
||||
static ref GEO_REGEX: Regex =
|
||||
|
@ -85,8 +85,13 @@ impl SearchQueue {
|
||||
},
|
||||
|
||||
search_request = receive_new_searches.recv() => {
|
||||
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
|
||||
let search_request = search_request.unwrap();
|
||||
let search_request = match search_request {
|
||||
Some(search_request) => search_request,
|
||||
// This should never happen while actix-web is running, but it's not a reason to crash
|
||||
// and it can generate a lot of noise in the tests.
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if searches_running < usize::from(parallelism) && queue.is_empty() {
|
||||
searches_running += 1;
|
||||
// if the search requests die it's not a hard error on our side
|
||||
|
@ -380,6 +380,43 @@ impl Index<'_> {
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
/// Performs both GET and POST similar queries
|
||||
pub async fn similar(
|
||||
&self,
|
||||
query: Value,
|
||||
test: impl Fn(Value, StatusCode) + UnwindSafe + Clone,
|
||||
) {
|
||||
let post = self.similar_post(query.clone()).await;
|
||||
|
||||
let query = yaup::to_string(&query).unwrap();
|
||||
let get = self.similar_get(&query).await;
|
||||
|
||||
insta::allow_duplicates! {
|
||||
let (response, code) = post;
|
||||
let t = test.clone();
|
||||
if let Err(e) = catch_unwind(move || t(response, code)) {
|
||||
eprintln!("Error with post search");
|
||||
resume_unwind(e);
|
||||
}
|
||||
|
||||
let (response, code) = get;
|
||||
if let Err(e) = catch_unwind(move || test(response, code)) {
|
||||
eprintln!("Error with get search");
|
||||
resume_unwind(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn similar_post(&self, query: Value) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/similar", urlencode(self.uid.as_ref()));
|
||||
self.service.post_encoded(url, query, self.encoder).await
|
||||
}
|
||||
|
||||
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
pub async fn facet_search(&self, query: Value) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/facet-search", urlencode(self.uid.as_ref()));
|
||||
self.service.post_encoded(url, query, self.encoder).await
|
||||
|
@ -1859,8 +1859,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
{
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -20,8 +20,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -32,8 +31,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -44,8 +42,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -57,8 +54,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -70,8 +66,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@ -90,8 +85,7 @@ async fn experimental_feature_metrics() {
|
||||
{
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -146,7 +140,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
@ -8,6 +8,7 @@ mod index;
|
||||
mod logs;
|
||||
mod search;
|
||||
mod settings;
|
||||
mod similar;
|
||||
mod snapshot;
|
||||
mod stats;
|
||||
mod swap_indexes;
|
||||
|
@ -505,7 +505,7 @@ async fn search_bad_matching_strategy() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`",
|
||||
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`, `frequency`",
|
||||
"code": "invalid_search_matching_strategy",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
|
||||
@ -527,7 +527,7 @@ async fn search_bad_matching_strategy() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`",
|
||||
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`, `frequency`",
|
||||
"code": "invalid_search_matching_strategy",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
|
||||
|
@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn bug_4640() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/4640
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.wait_task(ret.uid()).await;
|
||||
|
||||
// Sort the document with the second one first
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 2,
|
||||
"name": "La Bella Italia",
|
||||
"address": "456 Elm Street, Townsville",
|
||||
"type": "Italian",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Taco Truck",
|
||||
"address": "444 Salsa Street, Burritoville",
|
||||
"type": "Mexican",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": 34.0522,
|
||||
"lng": -118.2437
|
||||
},
|
||||
"_geoDistance": 9714063
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Crêpe Truck",
|
||||
"address": "2 Billig Avenue, Rouenville",
|
||||
"type": "French",
|
||||
"rating": 10
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 3
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -5,7 +5,10 @@ use crate::common::index::Index;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||
async fn index_with_documents_user_provided<'a>(
|
||||
server: &'a Server,
|
||||
documents: &Value,
|
||||
) -> Index<'a> {
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
@ -15,8 +18,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -34,7 +36,38 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
|
||||
index
|
||||
}
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({ "embedders": {"default": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"documentTemplate": "{{doc.title}}, {{doc.desc}}"
|
||||
}}} ))
|
||||
.await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.add_documents(documents.clone(), None).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
index
|
||||
}
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS_VEC: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
@ -56,7 +89,7 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
}])
|
||||
});
|
||||
|
||||
static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
|
||||
static SINGLE_DOCUMENT_VEC: Lazy<Value> = Lazy::new(|| {
|
||||
json!([{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
@ -65,10 +98,29 @@ static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
|
||||
}])
|
||||
});
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "2",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "3",
|
||||
}])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
@ -85,8 +137,8 @@ async fn simple_search() {
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"1");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"2");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
@ -98,10 +150,59 @@ async fn simple_search() {
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search_hf() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
// disable ranking score as the vectors between architectures are not equal
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"1");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn distribution_shift() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
|
||||
let (response, code) = index.search_post(search.clone()).await;
|
||||
@ -133,7 +234,7 @@ async fn distribution_shift() {
|
||||
#[actix_rt::test]
|
||||
async fn highlighter() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
@ -184,7 +285,7 @@ async fn highlighter() {
|
||||
#[actix_rt::test]
|
||||
async fn invalid_semantic_ratio() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
@ -256,7 +357,7 @@ async fn invalid_semantic_ratio() {
|
||||
#[actix_rt::test]
|
||||
async fn single_document() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
|
||||
let index = index_with_documents_user_provided(&server, &SINGLE_DOCUMENT_VEC).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
@ -272,7 +373,7 @@ async fn single_document() {
|
||||
#[actix_rt::test]
|
||||
async fn query_combination() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
// search without query and vector, but with hybrid => still placeholder
|
||||
let (response, code) = index
|
||||
@ -331,7 +432,7 @@ async fn query_combination() {
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.8848484848484849}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"null");
|
||||
|
||||
// query + vector, no hybrid keyword =>
|
||||
@ -374,6 +475,6 @@ async fn query_combination() {
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
}
|
||||
|
128
meilisearch/tests/search/matching_strategy.rs
Normal file
128
meilisearch/tests/search/matching_strategy.rs
Normal file
@ -0,0 +1,128 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::index::Index;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
index
|
||||
}
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"id": "2",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "3",
|
||||
},
|
||||
{
|
||||
"title": "a Captain Marvel ersatz",
|
||||
"id": "4"
|
||||
},
|
||||
{
|
||||
"title": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "5"
|
||||
},
|
||||
{
|
||||
"title": "a Shazam ersatz, but better than Captain Planet",
|
||||
"id": "6"
|
||||
},
|
||||
{
|
||||
"title": "Capitain CAAAAAVEEERNE!!!!",
|
||||
"id": "7"
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"2"},{"id":"6"},{"id":"7"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_typo() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"7"},{"id":"2"},{"id":"6"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_unknown_word() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"2"},{"id":"3"},{"id":"4"},{"id":"6"},{"id":"7"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @"[]");
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
|
||||
})
|
||||
.await;
|
||||
}
|
@ -7,6 +7,7 @@ mod facet_search;
|
||||
mod formatted;
|
||||
mod geo;
|
||||
mod hybrid;
|
||||
mod matching_strategy;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
@ -680,6 +681,26 @@ async fn search_facet_distribution() {
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["doggos.name"]})).await;
|
||||
index.wait_task(5).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"facets": ["doggos.name"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let dist = response["facetDistribution"].as_object().unwrap();
|
||||
assert_eq!(dist.len(), 1);
|
||||
assert_eq!(
|
||||
dist["doggos.name"],
|
||||
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
|
||||
);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -895,9 +916,9 @@ async fn test_score_details() {
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
-100.0,
|
||||
231.0,
|
||||
32.0
|
||||
]
|
||||
},
|
||||
"_rankingScoreDetails": {
|
||||
@ -921,7 +942,7 @@ async fn test_score_details() {
|
||||
"order": 3,
|
||||
"attributeRankingOrderScore": 1.0,
|
||||
"queryWordDistanceScore": 0.8095238095238095,
|
||||
"score": 0.9727891156462584
|
||||
"score": 0.8095238095238095
|
||||
},
|
||||
"exactness": {
|
||||
"order": 4,
|
||||
@ -1096,9 +1117,9 @@ async fn experimental_feature_vector_store() {
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
1.0,
|
||||
2.0,
|
||||
3.0
|
||||
]
|
||||
},
|
||||
"_rankingScore": 1.0
|
||||
@ -1108,9 +1129,9 @@ async fn experimental_feature_vector_store() {
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
1.0,
|
||||
2.0,
|
||||
54.0
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.9129111766815186
|
||||
@ -1120,9 +1141,9 @@ async fn experimental_feature_vector_store() {
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
-100.0,
|
||||
340.0,
|
||||
90.0
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.8106412887573242
|
||||
@ -1132,9 +1153,9 @@ async fn experimental_feature_vector_store() {
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
-100.0,
|
||||
231.0,
|
||||
32.0
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.7412010431289673
|
||||
@ -1144,9 +1165,9 @@ async fn experimental_feature_vector_store() {
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
10,
|
||||
-23,
|
||||
32
|
||||
10.0,
|
||||
-23.0,
|
||||
32.0
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.6972063183784485
|
||||
@ -1405,9 +1426,9 @@ async fn simple_search_with_strange_synonyms() {
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
-100.0,
|
||||
231.0,
|
||||
32.0
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -1426,9 +1447,9 @@ async fn simple_search_with_strange_synonyms() {
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
-100.0,
|
||||
231.0,
|
||||
32.0
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -1447,9 +1468,9 @@ async fn simple_search_with_strange_synonyms() {
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
-100.0,
|
||||
231.0,
|
||||
32.0
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -75,9 +75,9 @@ async fn simple_search_single_index() {
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
-100.0,
|
||||
340.0,
|
||||
90.0
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -96,9 +96,9 @@ async fn simple_search_single_index() {
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
1.0,
|
||||
2.0,
|
||||
54.0
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -194,9 +194,9 @@ async fn simple_search_two_indexes() {
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
-100.0,
|
||||
340.0,
|
||||
90.0
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -227,9 +227,9 @@ async fn simple_search_two_indexes() {
|
||||
"cattos": "pésti",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
1.0,
|
||||
2.0,
|
||||
3.0
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -249,9 +249,9 @@ async fn simple_search_two_indexes() {
|
||||
],
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
1.0,
|
||||
2.0,
|
||||
54.0
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -285,10 +285,10 @@ async fn attributes_ranking_rule_order() {
|
||||
@r###"
|
||||
[
|
||||
{
|
||||
"id": "2"
|
||||
"id": "1"
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
"id": "2"
|
||||
}
|
||||
]
|
||||
"###
|
||||
|
@ -98,8 +98,7 @@ async fn secrets_are_hidden_in_settings() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
696
meilisearch/tests/similar/errors.rs
Normal file
696
meilisearch/tests/similar/errors.rs
Normal file
@ -0,0 +1,696 @@
|
||||
use meili_snap::*;
|
||||
|
||||
use super::DOCUMENTS;
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
|
||||
index
|
||||
.similar(json!({"id": 287947}), |response, code| {
|
||||
assert_eq!(code, 404);
|
||||
assert_eq!(response, expected_response);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_unexisting_parameter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 287947, "marin": "hello"}), |response, code| {
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
assert_eq!(response["code"], "bad_request");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_feature_not_enabled() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": 287947})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Using the similar API requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_bad_id() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": ["doggo"]})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_invalid_id() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": "http://invalid-docid/"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_not_found_id() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Document `definitely-doesnt-exist` not found.",
|
||||
"code": "not_found_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#not_found_similar_id"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_bad_offset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_similar_offset",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_offset"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("id=287947&offset=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `offset`: could not parse `doggo` as a positive integer",
|
||||
"code": "invalid_similar_offset",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_offset"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_bad_limit() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_similar_limit",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_limit"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("id=287946&limit=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `limit`: could not parse `doggo` as a positive integer",
|
||||
"code": "invalid_similar_limit",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_limit"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_bad_filter() {
|
||||
// Since a filter is deserialized as a json Value it will never fail to deserialize.
|
||||
// Thus the error message is not generated by deserr but written by us.
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
}
|
||||
"###);
|
||||
// Can't make the `filter` fail with a get search since it'll accept anything as a strings.
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_syntax_object() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_syntax_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_syntax_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_attribute_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_attribute_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_attribute_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_attribute_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_attribute_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_attribute_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
373
meilisearch/tests/similar/mod.rs
Normal file
373
meilisearch/tests/similar/mod.rs
Normal file
@ -0,0 +1,373 @@
|
||||
mod errors;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
// Three semantic properties:
|
||||
// 1. magic, anything that reminds you of magic
|
||||
// 2. authority, anything that inspires command
|
||||
// 3. horror, anything that inspires fear or dread
|
||||
"_vectors": { "manual": [0.8, 0.4, -0.5]},
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": { "manual": [0.6, 0.8, -0.2] },
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": { "manual": [0.1, 0.6, 0.8] },
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": { "manual": [0.7, 0.7, -0.4] },
|
||||
},
|
||||
{
|
||||
"title": "All Quiet on the Western Front",
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": { "manual": [-0.5, 0.3, 0.85] },
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn basic() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": "299537"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "All Quiet on the Western Front",
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-0.5,
|
||||
0.3,
|
||||
0.85
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title", "release_year"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "All Quiet on the Western Front",
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-0.5,
|
||||
0.3,
|
||||
0.85
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn limit_and_offset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143, "limit": 1}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_rt::time::sleep;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::option::ScheduleSnapshot;
|
||||
use meilisearch::Opt;
|
||||
@ -32,6 +31,7 @@ macro_rules! verify_snapshot {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
async fn perform_snapshot() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let snapshot_dir = tempfile::tempdir().unwrap();
|
||||
@ -53,11 +53,29 @@ async fn perform_snapshot() {
|
||||
|
||||
index.load_test_set().await;
|
||||
|
||||
server.index("test1").create(Some("prim")).await;
|
||||
let (task, code) = server.index("test1").create(Some("prim")).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
index.wait_task(2).await;
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
// wait for the _next task_ to process, aka the snapshot that should be enqueued at some point
|
||||
|
||||
println!("waited for the next task to finish");
|
||||
let now = std::time::Instant::now();
|
||||
let next_task = task.uid() + 1;
|
||||
loop {
|
||||
let (value, code) = index.get_task(next_task).await;
|
||||
dbg!(&value);
|
||||
if code != 404 && value["status"].as_str() == Some("succeeded") {
|
||||
break;
|
||||
}
|
||||
|
||||
if now.elapsed() > Duration::from_secs(30) {
|
||||
panic!("The snapshot didn't schedule in 30s even though it was supposed to be scheduled every 2s: {}",
|
||||
serde_json::to_string_pretty(&value).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
||||
|
@ -80,9 +80,7 @@ fn main() -> anyhow::Result<()> {
|
||||
/// Clears the task queue located at `db_path`.
|
||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
let path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&path)
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
|
||||
.with_context(|| format!("While trying to open {:?}", path.display()))?;
|
||||
|
||||
eprintln!("Deleting tasks from the database...");
|
||||
@ -193,9 +191,7 @@ fn export_a_dump(
|
||||
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&index_scheduler_path)
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Dumping the keys...");
|
||||
|
@ -30,7 +30,7 @@ grenad = { version = "0.4.6", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
|
||||
heed = { version = "0.20.1", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
"read-txn-no-tls",
|
||||
@ -67,9 +67,6 @@ filter-parser = { path = "../filter-parser" }
|
||||
# documents words self-join
|
||||
itertools = "0.11.0"
|
||||
|
||||
# profiling
|
||||
puffin = "0.16.0"
|
||||
|
||||
csv = "1.3.0"
|
||||
candle-core = { version = "0.4.1" }
|
||||
candle-transformers = { version = "0.4.1" }
|
||||
@ -82,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
||||
] }
|
||||
tiktoken-rs = "0.5.8"
|
||||
liquid = "0.26.4"
|
||||
arroy = "0.2.0"
|
||||
arroy = "0.3.1"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
ureq = { version = "2.9.7", features = ["json"] }
|
||||
|
@ -48,8 +48,8 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let universe = filtered_universe(&ctx, &None)?;
|
||||
let mut ctx = SearchContext::new(&index, &txn)?;
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
|
||||
|
||||
let docs = execute_search(
|
||||
&mut ctx,
|
||||
|
3
milli/fuzz/.gitignore
vendored
Normal file
3
milli/fuzz/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
target
|
||||
corpus
|
||||
artifacts
|
@ -12,7 +12,10 @@ use bimap::BiHashMap;
|
||||
pub use builder::DocumentsBatchBuilder;
|
||||
pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
|
||||
use obkv::KvReader;
|
||||
pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
|
||||
pub use primary_key::{
|
||||
validate_document_id_value, DocumentIdExtractionError, FieldIdMapper, PrimaryKey,
|
||||
DEFAULT_PRIMARY_KEY,
|
||||
};
|
||||
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
|
@ -60,7 +60,7 @@ impl<'a> PrimaryKey<'a> {
|
||||
Some(document_id_bytes) => {
|
||||
let document_id = serde_json::from_slice(document_id_bytes)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
match validate_document_id_value(document_id)? {
|
||||
match validate_document_id_value(document_id) {
|
||||
Ok(document_id) => Ok(Ok(document_id)),
|
||||
Err(user_error) => {
|
||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||
@ -88,7 +88,7 @@ impl<'a> PrimaryKey<'a> {
|
||||
}
|
||||
|
||||
match matching_documents_ids.pop() {
|
||||
Some(document_id) => match validate_document_id_value(document_id)? {
|
||||
Some(document_id) => match validate_document_id_value(document_id) {
|
||||
Ok(document_id) => Ok(Ok(document_id)),
|
||||
Err(user_error) => {
|
||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||
@ -159,14 +159,14 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
|
||||
pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserError> {
|
||||
match document_id {
|
||||
Value::String(string) => match validate_document_id(&string) {
|
||||
Some(s) if s.len() == string.len() => Ok(Ok(string)),
|
||||
Some(s) => Ok(Ok(s.to_string())),
|
||||
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
|
||||
Some(s) if s.len() == string.len() => Ok(string),
|
||||
Some(s) => Ok(s.to_string()),
|
||||
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
|
||||
},
|
||||
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
|
||||
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
|
||||
Value::Number(number) if number.is_i64() => Ok(number.to_string()),
|
||||
content => Err(UserError::InvalidDocumentId { document_id: content }),
|
||||
}
|
||||
}
|
||||
|
@ -32,6 +32,8 @@ pub enum InternalError {
|
||||
DatabaseClosing,
|
||||
#[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
|
||||
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
|
||||
#[error("Missing {key} in the fieldids weights mapping.")]
|
||||
FieldidsWeightsMapMissingEntry { key: FieldId },
|
||||
#[error(transparent)]
|
||||
FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
|
||||
#[error("Missing {key} in the field id mapping.")]
|
||||
@ -46,8 +48,6 @@ pub enum InternalError {
|
||||
GrenadInvalidFormatVersion,
|
||||
#[error("Invalid merge while processing {process}")]
|
||||
IndexingMergingKeys { process: &'static str },
|
||||
#[error("{}", HeedError::InvalidDatabaseTyping)]
|
||||
InvalidDatabaseTyping,
|
||||
#[error(transparent)]
|
||||
RayonThreadPool(#[from] ThreadPoolBuildError),
|
||||
#[error(transparent)]
|
||||
@ -117,10 +117,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
InvalidGeoField(#[from] GeoError),
|
||||
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
|
||||
InvalidVectorDimensions { expected: usize, found: usize },
|
||||
#[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
|
||||
InvalidVectorsType { document_id: Value, value: Value, subfield: String },
|
||||
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
|
||||
InvalidVectorsMapType { document_id: Value, value: Value },
|
||||
InvalidVectorsMapType { document_id: String, value: Value },
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
|
||||
@ -427,7 +425,6 @@ impl From<HeedError> for Error {
|
||||
// TODO use the encoding
|
||||
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
|
||||
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
|
||||
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
|
||||
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
|
||||
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
|
||||
}
|
||||
|
48
milli/src/fieldids_weights_map.rs
Normal file
48
milli/src/fieldids_weights_map.rs
Normal file
@ -0,0 +1,48 @@
|
||||
//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{FieldId, FieldsIdsMap, Weight};
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct FieldidsWeightsMap {
|
||||
map: HashMap<FieldId, Weight>,
|
||||
}
|
||||
|
||||
impl FieldidsWeightsMap {
|
||||
/// Insert a field id -> weigth into the map.
|
||||
/// If the map did not have this key present, `None` is returned.
|
||||
/// If the map did have this key present, the value is updated, and the old value is returned.
|
||||
pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
|
||||
self.map.insert(fid, weight)
|
||||
}
|
||||
|
||||
/// Create the map from the fields ids maps.
|
||||
/// Should only be called in the case there are NO searchable attributes.
|
||||
/// All the fields will be inserted in the order of the fields ids map with a weight of 0.
|
||||
pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
|
||||
FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() }
|
||||
}
|
||||
|
||||
/// Removes a field id from the map, returning the associated weight previously in the map.
|
||||
pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
|
||||
self.map.remove(&fid)
|
||||
}
|
||||
|
||||
/// Returns weight corresponding to the key.
|
||||
pub fn weight(&self, fid: FieldId) -> Option<Weight> {
|
||||
self.map.get(&fid).copied()
|
||||
}
|
||||
|
||||
/// Returns highest weight contained in the map if any.
|
||||
pub fn max_weight(&self) -> Option<Weight> {
|
||||
self.map.values().copied().max()
|
||||
}
|
||||
|
||||
/// Return an iterator visiting all field ids in arbitrary order.
|
||||
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
|
||||
self.map.keys().copied()
|
||||
}
|
||||
}
|
@ -195,7 +195,7 @@ mod tests {
|
||||
fn merge_cbo_roaring_bitmaps() {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
let small_data = vec![
|
||||
let small_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
|
||||
@ -209,7 +209,7 @@ mod tests {
|
||||
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
|
||||
let medium_data = vec![
|
||||
let medium_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..8).unwrap(),
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
@ -8,6 +9,7 @@ use heed::types::*;
|
||||
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use serde::Serialize;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::documents::PrimaryKey;
|
||||
@ -22,11 +24,12 @@ use crate::heed_codec::{
|
||||
};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::vector::EmbeddingConfig;
|
||||
use crate::vector::{Embedding, EmbeddingConfig};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||
FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||
Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@ -42,6 +45,7 @@ pub mod main_key {
|
||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
|
||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
@ -181,7 +185,7 @@ impl Index {
|
||||
|
||||
options.max_dbs(25);
|
||||
|
||||
let env = options.open(path)?;
|
||||
let env = unsafe { options.open(path) }?;
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let main = env.database_options().name(MAIN).create(&mut wtxn)?;
|
||||
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
|
||||
@ -291,6 +295,11 @@ impl Index {
|
||||
self.env.read_txn()
|
||||
}
|
||||
|
||||
/// Create a static read transaction to be able to read the index without keeping a reference to it.
|
||||
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
|
||||
self.env.clone().static_read_txn()
|
||||
}
|
||||
|
||||
/// Returns the canonicalized path where the heed `Env` of this `Index` lives.
|
||||
pub fn path(&self) -> &Path {
|
||||
self.env.path()
|
||||
@ -316,6 +325,87 @@ impl Index {
|
||||
self.env.info().map_size
|
||||
}
|
||||
|
||||
pub fn advanced_stats(&self, rtxn: &heed::RoTxn) -> Result<AdvancedStats> {
|
||||
use db_name::*;
|
||||
|
||||
let mut dbs = BTreeMap::new();
|
||||
dbs.insert(WORD_DOCIDS, advanced_database_stats(rtxn, self.word_docids)?);
|
||||
dbs.insert(
|
||||
WORD_PAIR_PROXIMITY_DOCIDS,
|
||||
advanced_database_stats(rtxn, self.word_pair_proximity_docids)?,
|
||||
);
|
||||
dbs.insert(WORD_PREFIX_DOCIDS, advanced_database_stats(rtxn, self.word_prefix_docids)?);
|
||||
dbs.insert(WORD_FIELD_ID_DOCIDS, advanced_database_stats(rtxn, self.word_fid_docids)?);
|
||||
dbs.insert(WORD_POSITION_DOCIDS, advanced_database_stats(rtxn, self.word_position_docids)?);
|
||||
dbs.insert(DOCUMENTS, advanced_database_stats_no_bitmap(rtxn, self.documents)?);
|
||||
|
||||
fn advanced_database_stats<KC>(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: Database<KC, CboRoaringBitmapCodec>,
|
||||
) -> Result<AdvancedDatabaseStats> {
|
||||
let db = db.remap_key_type::<Bytes>().lazily_decode_data();
|
||||
|
||||
let mut entries_count = 0;
|
||||
let mut total_bitmap_size = 0;
|
||||
let mut total_bitmap_len = 0;
|
||||
let mut total_key_size = 0;
|
||||
|
||||
for result in db.iter(rtxn)? {
|
||||
let (bytes_key, lazy_value) = result?;
|
||||
entries_count += 1;
|
||||
total_bitmap_size += lazy_value.remap::<Bytes>().decode().unwrap().len();
|
||||
let bitmap = lazy_value.decode().map_err(heed::Error::Decoding)?;
|
||||
total_bitmap_len += bitmap.len();
|
||||
total_key_size += bytes_key.len();
|
||||
}
|
||||
|
||||
Ok(AdvancedDatabaseStats {
|
||||
entries_count,
|
||||
average_bitmap_len: Some(total_bitmap_len as f64 / entries_count as f64),
|
||||
median_bitmap_len: None,
|
||||
average_value_size: Some(total_bitmap_size as f64 / entries_count as f64),
|
||||
median_value_size: None,
|
||||
average_key_size: Some(total_key_size as f64 / entries_count as f64),
|
||||
median_key_size: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn advanced_database_stats_no_bitmap<KC, DC>(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: Database<KC, DC>,
|
||||
) -> Result<AdvancedDatabaseStats> {
|
||||
let db = db.remap_types::<Bytes, Bytes>();
|
||||
|
||||
let mut entries_count = 0;
|
||||
let mut total_value_size = 0;
|
||||
let mut total_key_size = 0;
|
||||
|
||||
for result in db.iter(rtxn)? {
|
||||
let (bytes_key, bytes_value) = result?;
|
||||
entries_count += 1;
|
||||
total_value_size += bytes_value.len();
|
||||
total_key_size += bytes_key.len();
|
||||
}
|
||||
|
||||
Ok(AdvancedDatabaseStats {
|
||||
entries_count,
|
||||
average_bitmap_len: None,
|
||||
median_bitmap_len: None,
|
||||
average_value_size: Some(total_value_size as f64 / entries_count as f64),
|
||||
median_value_size: None,
|
||||
average_key_size: Some(total_key_size as f64 / entries_count as f64),
|
||||
median_key_size: None,
|
||||
})
|
||||
}
|
||||
|
||||
Ok(AdvancedStats {
|
||||
map_size: self.map_size(),
|
||||
non_free_pages_size: self.on_disk_size()?,
|
||||
on_disk_size: self.on_disk_size()?,
|
||||
databases: dbs,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
|
||||
self.env.copy_to_file(path, option).map_err(Into::into)
|
||||
}
|
||||
@ -414,6 +504,65 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/* fieldids weights map */
|
||||
// This maps the fields ids to their weights.
|
||||
// Their weights is defined by the ordering of the searchable attributes.
|
||||
|
||||
/// Writes the fieldids weights map which associates the field ids to their weights
|
||||
pub(crate) fn put_fieldids_weights_map(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
map: &FieldidsWeightsMap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||
wtxn,
|
||||
main_key::FIELDIDS_WEIGHTS_MAP_KEY,
|
||||
map,
|
||||
)
|
||||
}
|
||||
|
||||
/// Get the fieldids weights map which associates the field ids to their weights
|
||||
pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
|
||||
self.main
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
.get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
|
||||
.map(Ok)
|
||||
.unwrap_or_else(|| {
|
||||
Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
|
||||
&self.fields_ids_map(rtxn)?,
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
/// Delete the fieldsids weights map
|
||||
pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
|
||||
}
|
||||
|
||||
pub fn searchable_fields_and_weights<'a>(
|
||||
&self,
|
||||
rtxn: &'a RoTxn,
|
||||
) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
|
||||
let fid_map = self.fields_ids_map(rtxn)?;
|
||||
let weight_map = self.fieldids_weights_map(rtxn)?;
|
||||
let searchable = self.searchable_fields(rtxn)?;
|
||||
|
||||
searchable
|
||||
.into_iter()
|
||||
.map(|field| -> Result<_> {
|
||||
let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||
field_name: field.to_string(),
|
||||
process: "searchable_fields_and_weights",
|
||||
})?;
|
||||
let weight = weight_map
|
||||
.weight(fid)
|
||||
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
|
||||
|
||||
Ok((field, fid, weight))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/* geo rtree */
|
||||
|
||||
/// Writes the provided `rtree` which associates coordinates to documents ids.
|
||||
@ -578,33 +727,42 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
user_fields: &[&str],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
) -> heed::Result<()> {
|
||||
) -> Result<()> {
|
||||
// We can write the user defined searchable fields as-is.
|
||||
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
|
||||
|
||||
let mut weights = FieldidsWeightsMap::default();
|
||||
|
||||
// Now we generate the real searchable fields:
|
||||
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
|
||||
// 2. Iterate over the user defined searchable fields.
|
||||
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
|
||||
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
|
||||
let mut real_fields = user_fields.to_vec();
|
||||
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
|
||||
let mut real_fields = Vec::new();
|
||||
|
||||
for field_from_map in fields_ids_map.names() {
|
||||
for user_field in user_fields {
|
||||
for (id, field_from_map) in fields_ids_map.iter() {
|
||||
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||
if crate::is_faceted_by(field_from_map, user_field)
|
||||
&& !user_fields.contains(&field_from_map)
|
||||
&& !real_fields.contains(&field_from_map)
|
||||
{
|
||||
real_fields.push(field_from_map);
|
||||
|
||||
let weight: u16 =
|
||||
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
|
||||
weights.insert(id, weight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.put_searchable_fields(wtxn, &real_fields)
|
||||
self.put_searchable_fields(wtxn, &real_fields)?;
|
||||
self.put_fieldids_weights_map(wtxn, &weights)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
|
||||
let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
|
||||
self.delete_fieldids_weights_map(wtxn)?;
|
||||
Ok(did_delete_searchable || did_delete_user_defined)
|
||||
}
|
||||
|
||||
@ -623,28 +781,31 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the searchable fields, those are the fields that are indexed,
|
||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> {
|
||||
self.main
|
||||
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
|
||||
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
|
||||
.map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
|
||||
.unwrap_or_else(|| {
|
||||
Ok(self
|
||||
.fields_ids_map(rtxn)?
|
||||
.names()
|
||||
.map(|field| Cow::Owned(field.to_string()))
|
||||
.collect())
|
||||
})
|
||||
}
|
||||
|
||||
/// Identical to `searchable_fields`, but returns the ids instead.
|
||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
|
||||
match self.searchable_fields(rtxn)? {
|
||||
Some(fields) => {
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let mut fields_ids = Vec::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(name) {
|
||||
fields_ids.push(field_id);
|
||||
}
|
||||
}
|
||||
Ok(Some(fields_ids))
|
||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> {
|
||||
let fields = self.searchable_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let mut fields_ids = Vec::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
||||
fields_ids.push(field_id);
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
Ok(fields_ids)
|
||||
}
|
||||
|
||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||
@ -1516,6 +1677,22 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn arroy_readers<'a>(
|
||||
&'a self,
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
embedder_id: u8,
|
||||
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::Angular>>> + 'a {
|
||||
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
||||
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
e => Err(e.into()),
|
||||
})
|
||||
.transpose()
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
|
||||
}
|
||||
@ -1527,6 +1704,74 @@ impl Index {
|
||||
pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
|
||||
}
|
||||
|
||||
pub fn embeddings(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
docid: DocumentId,
|
||||
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
||||
let mut res = BTreeMap::new();
|
||||
for row in self.embedder_category_id.iter(rtxn)? {
|
||||
let (embedder_name, embedder_id) = row?;
|
||||
let embedder_id = (embedder_id as u16) << 8;
|
||||
let mut embeddings = Vec::new();
|
||||
'vectors: for i in 0..=u8::MAX {
|
||||
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose();
|
||||
|
||||
let Some(reader) = reader else {
|
||||
break 'vectors;
|
||||
};
|
||||
|
||||
let embedding = reader?.item_vector(rtxn, docid)?;
|
||||
if let Some(embedding) = embedding {
|
||||
embeddings.push(embedding)
|
||||
} else {
|
||||
break 'vectors;
|
||||
}
|
||||
}
|
||||
|
||||
if !embeddings.is_empty() {
|
||||
res.insert(embedder_name.to_owned(), embeddings);
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct AdvancedStats {
|
||||
/// Size of the data memory map.
|
||||
map_size: usize,
|
||||
/// Returns the size used by all the databases in the environment without the free pages.
|
||||
non_free_pages_size: u64,
|
||||
/// The size of the data file on disk.
|
||||
on_disk_size: u64,
|
||||
/// Databases advanced stats.
|
||||
databases: BTreeMap<&'static str, AdvancedDatabaseStats>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct AdvancedDatabaseStats {
|
||||
/// The number of entries in this database.
|
||||
entries_count: usize,
|
||||
/// The average number of entries in the bitmaps of this database.
|
||||
average_bitmap_len: Option<f64>,
|
||||
/// The median number of entries in the bitmaps of this database.
|
||||
median_bitmap_len: Option<f64>,
|
||||
/// The average size of values of this database.
|
||||
average_value_size: Option<f64>,
|
||||
/// The median size of values of this database.
|
||||
median_value_size: Option<f64>,
|
||||
/// The average size of keys of this database.
|
||||
average_key_size: Option<f64>,
|
||||
/// The mediane size of keys of this database.
|
||||
median_key_size: Option<f64>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -1710,10 +1955,14 @@ pub(crate) mod tests {
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, field_distribution, 1);
|
||||
db_snap!(index, field_distribution, @r###"
|
||||
age 1 |
|
||||
id 2 |
|
||||
name 2 |
|
||||
"###);
|
||||
|
||||
db_snap!(index, word_docids,
|
||||
@r###"
|
||||
@r###"
|
||||
1 [0, ]
|
||||
2 [1, ]
|
||||
20 [1, ]
|
||||
@ -1722,18 +1971,6 @@ pub(crate) mod tests {
|
||||
"###
|
||||
);
|
||||
|
||||
db_snap!(index, field_distribution);
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@r###"
|
||||
age 1 |
|
||||
id 2 |
|
||||
name 2 |
|
||||
"###
|
||||
);
|
||||
|
||||
// snapshot_index!(&index, "1", include: "^field_distribution$");
|
||||
|
||||
// we add all the documents a second time. we are supposed to get the same
|
||||
// field_distribution in the end
|
||||
index
|
||||
@ -1820,7 +2057,7 @@ pub(crate) mod tests {
|
||||
// ensure we get the right real searchable fields + user defined searchable fields
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
||||
let real = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
||||
|
||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||
@ -1840,7 +2077,7 @@ pub(crate) mod tests {
|
||||
// ensure we get the right real searchable fields + user defined searchable fields
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
||||
let real = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(real, &["doggo", "name"]);
|
||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||
assert_eq!(user_defined, &["doggo", "name"]);
|
||||
@ -1856,7 +2093,7 @@ pub(crate) mod tests {
|
||||
// ensure we get the right real searchable fields + user defined searchable fields
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
||||
let real = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
||||
|
||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||
@ -2395,6 +2632,14 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
"###);
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
@ -2410,6 +2655,16 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
1 a |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 0 |
|
||||
"###);
|
||||
|
||||
index.delete_documents(Default::default());
|
||||
|
||||
@ -2420,6 +2675,16 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
1 a |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 0 |
|
||||
"###);
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
@ -2435,6 +2700,16 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
1 a |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 0 |
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let search = Search::new(&rtxn, &index);
|
||||
@ -2520,4 +2795,104 @@ pub(crate) mod tests {
|
||||
|
||||
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swapping_searchable_attributes() {
|
||||
// See https://github.com/meilisearch/meilisearch/issues/4484
|
||||
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("name")]);
|
||||
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
|
||||
.unwrap();
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 name |
|
||||
1 id |
|
||||
2 age |
|
||||
3 realName |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["name"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("name"), S("realName")]);
|
||||
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// The order of the field id map shouldn't change
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 name |
|
||||
1 id |
|
||||
2 age |
|
||||
3 realName |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
3 1 |
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn attribute_weights_after_swapping_searchable_attributes() {
|
||||
// See https://github.com/meilisearch/meilisearch/issues/4484
|
||||
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("name"), S("beverage")]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "name": "kefir", "beverage": "water" },
|
||||
{ "id": 1, "name": "tamo", "beverage": "kefir" }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("kefir").execute().unwrap();
|
||||
|
||||
// We should find kefir the dog first
|
||||
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
||||
[
|
||||
0,
|
||||
1,
|
||||
]
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("beverage"), S("name")]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("kefir").execute().unwrap();
|
||||
|
||||
// We should find tamo first
|
||||
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
||||
[
|
||||
1,
|
||||
0,
|
||||
]
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ pub mod vector;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
pub mod snapshot_tests;
|
||||
mod fieldids_weights_map;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
@ -52,6 +53,7 @@ pub use self::error::{
|
||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||
};
|
||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
||||
pub use self::fields_ids_map::FieldsIdsMap;
|
||||
pub use self::heed_codec::{
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
@ -61,6 +63,7 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
|
||||
pub use self::search::similar::Similar;
|
||||
pub use self::search::{
|
||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
||||
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
@ -77,6 +80,7 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
||||
pub type FieldDistribution = BTreeMap<String, u64>;
|
||||
pub type FieldId = u16;
|
||||
pub type Weight = u16;
|
||||
pub type Object = serde_json::Map<String, serde_json::Value>;
|
||||
pub type Position = u32;
|
||||
pub type RelativePosition = u16;
|
||||
@ -351,43 +355,13 @@ pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsR
|
||||
/// assert!(!is_faceted_by("animaux.chien", "animaux.chie"));
|
||||
/// ```
|
||||
pub fn is_faceted_by(field: &str, facet: &str) -> bool {
|
||||
field.starts_with(facet)
|
||||
&& field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
|
||||
field.starts_with(facet) && field[facet.len()..].chars().next().map_or(true, |c| c == '.')
|
||||
}
|
||||
|
||||
pub fn normalize_facet(original: &str) -> String {
|
||||
CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
|
||||
}
|
||||
|
||||
/// Represents either a vector or an array of multiple vectors.
|
||||
#[derive(serde::Serialize, serde::Deserialize, Debug)]
|
||||
#[serde(transparent)]
|
||||
pub struct VectorOrArrayOfVectors {
|
||||
#[serde(with = "either::serde_untagged_optional")]
|
||||
inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
|
||||
}
|
||||
|
||||
impl VectorOrArrayOfVectors {
|
||||
pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
|
||||
match self.inner? {
|
||||
either::Either::Left(vector) => Some(vec![vector]),
|
||||
either::Either::Right(vectors) => Some(vectors),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize a vector by dividing the dimensions by the length of it.
|
||||
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
|
||||
let squared: f32 = vector.iter().map(|x| x * x).sum();
|
||||
let length = squared.sqrt();
|
||||
if length <= f32::EPSILON {
|
||||
vector
|
||||
} else {
|
||||
vector.iter_mut().for_each(|x| *x /= length);
|
||||
vector
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
@ -24,6 +24,7 @@ pub mod facet;
|
||||
mod fst_utils;
|
||||
pub mod hybrid;
|
||||
pub mod new;
|
||||
pub mod similar;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SemanticSearch {
|
||||
@ -147,21 +148,21 @@ impl<'a> Search<'a> {
|
||||
|
||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||
if has_vector_search {
|
||||
let ctx = SearchContext::new(self.index, self.rtxn);
|
||||
filtered_universe(&ctx, &self.filter)
|
||||
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||
filtered_universe(ctx.index, ctx.txn, &self.filter)
|
||||
} else {
|
||||
Ok(self.execute()?.candidates)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<SearchResult> {
|
||||
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
||||
let mut ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||
|
||||
if let Some(searchable_attributes) = self.searchable_attributes {
|
||||
ctx.searchable_attributes(searchable_attributes)?;
|
||||
ctx.attributes_to_search_on(searchable_attributes)?;
|
||||
}
|
||||
|
||||
let universe = filtered_universe(&ctx, &self.filter)?;
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
|
||||
let PartialSearchResult {
|
||||
located_query_terms,
|
||||
candidates,
|
||||
@ -276,6 +277,8 @@ pub enum TermsMatchingStrategy {
|
||||
Last,
|
||||
// all words are mandatory
|
||||
All,
|
||||
// remove more frequent word first
|
||||
Frequency,
|
||||
}
|
||||
|
||||
impl Default for TermsMatchingStrategy {
|
||||
|
@ -101,7 +101,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
|
||||
let mut ranking_rule_universes: Vec<RoaringBitmap> =
|
||||
vec![RoaringBitmap::default(); ranking_rules_len];
|
||||
ranking_rule_universes[0] = universe.clone();
|
||||
ranking_rule_universes[0].clone_from(universe);
|
||||
let mut cur_ranking_rule_index = 0;
|
||||
|
||||
/// Finish iterating over the current ranking rule, yielding
|
||||
@ -232,7 +232,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
}
|
||||
|
||||
cur_ranking_rule_index += 1;
|
||||
ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone();
|
||||
ranking_rule_universes[cur_ranking_rule_index].clone_from(&next_bucket.candidates);
|
||||
logger.start_iteration_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
|
@ -163,7 +163,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(word).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
||||
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
@ -192,7 +192,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(word).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
||||
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
@ -242,7 +242,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
||||
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
@ -271,7 +271,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
||||
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
@ -315,11 +315,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
.map_err(heed::Error::Decoding)?
|
||||
} else {
|
||||
// Compute the distance at the attribute level and store it in the cache.
|
||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||
fids
|
||||
} else {
|
||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||
};
|
||||
let fids = self.index.searchable_fields_ids(self.txn)?;
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for fid in fids {
|
||||
// for each field, intersect left word bitmap and right word bitmap,
|
||||
@ -408,11 +404,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let prefix_docids = match proximity_precision {
|
||||
ProximityPrecision::ByAttribute => {
|
||||
// Compute the distance at the attribute level and store it in the cache.
|
||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||
fids
|
||||
} else {
|
||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||
};
|
||||
let fids = self.index.searchable_fields_ids(self.txn)?;
|
||||
let mut prefix_docids = RoaringBitmap::new();
|
||||
// for each field, intersect left word bitmap and right word bitmap,
|
||||
// then merge the result in a global bitmap before storing it in the cache.
|
||||
|
@ -184,13 +184,7 @@ impl State {
|
||||
return Ok(State::Empty(query_graph.clone()));
|
||||
}
|
||||
|
||||
let searchable_fields_ids = {
|
||||
if let Some(fids) = ctx.index.searchable_fields_ids(ctx.txn)? {
|
||||
fids
|
||||
} else {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.ids().collect()
|
||||
}
|
||||
};
|
||||
let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?;
|
||||
|
||||
let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
|
||||
// then check that there exists at least one attribute that has all of the terms
|
||||
|
@ -164,6 +164,21 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
}
|
||||
costs
|
||||
}
|
||||
TermsMatchingStrategy::Frequency => {
|
||||
let removal_order =
|
||||
query_graph.removal_order_for_terms_matching_strategy_frequency(ctx)?;
|
||||
let mut forbidden_nodes =
|
||||
SmallBitmap::for_interned_values_in(&query_graph.nodes);
|
||||
let mut costs = query_graph.nodes.map(|_| None);
|
||||
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
|
||||
for ns in removal_order {
|
||||
for n in ns.iter() {
|
||||
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
|
||||
}
|
||||
forbidden_nodes.union(&ns);
|
||||
}
|
||||
costs
|
||||
}
|
||||
TermsMatchingStrategy::All => query_graph.nodes.map(|_| None),
|
||||
}
|
||||
} else {
|
||||
|
@ -258,7 +258,7 @@ pub(crate) mod tests {
|
||||
fn matching_words() {
|
||||
let temp_index = temp_index_with_documents();
|
||||
let rtxn = temp_index.read_txn().unwrap();
|
||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
||||
let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
|
||||
let mut builder = TokenizerBuilder::default();
|
||||
let tokenizer = builder.build();
|
||||
let tokens = tokenizer.tokenize("split this world");
|
||||
|
@ -506,8 +506,8 @@ mod tests {
|
||||
|
||||
impl<'a> MatcherBuilder<'a> {
|
||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||
let mut ctx = SearchContext::new(index, rtxn);
|
||||
let universe = filtered_universe(&ctx, &None).unwrap();
|
||||
let mut ctx = SearchContext::new(index, rtxn).unwrap();
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
|
||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||
&mut ctx,
|
||||
Some(query),
|
||||
|
@ -49,13 +49,12 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use self::vector_sort::VectorSort;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::vector::Embedder;
|
||||
use crate::{
|
||||
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
||||
UserError,
|
||||
UserError, Weight,
|
||||
};
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
@ -71,8 +70,21 @@ pub struct SearchContext<'ctx> {
|
||||
}
|
||||
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
|
||||
Self {
|
||||
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> {
|
||||
let searchable_fids = index.searchable_fields_and_weights(txn)?;
|
||||
let exact_attributes_ids = index.exact_attributes_ids(txn)?;
|
||||
|
||||
let mut exact = Vec::new();
|
||||
let mut tolerant = Vec::new();
|
||||
for (_name, fid, weight) in searchable_fids {
|
||||
if exact_attributes_ids.contains(&fid) {
|
||||
exact.push((fid, weight));
|
||||
} else {
|
||||
tolerant.push((fid, weight));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
txn,
|
||||
db_cache: <_>::default(),
|
||||
@ -81,42 +93,39 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
term_interner: <_>::default(),
|
||||
phrase_docids: <_>::default(),
|
||||
restricted_fids: None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
||||
let searchable_names = self.index.searchable_fields(self.txn)?;
|
||||
pub fn attributes_to_search_on(
|
||||
&mut self,
|
||||
attributes_to_search_on: &'ctx [String],
|
||||
) -> Result<()> {
|
||||
let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
|
||||
let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?;
|
||||
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
||||
|
||||
let mut wildcard = false;
|
||||
|
||||
let mut restricted_fids = RestrictedFids::default();
|
||||
let mut contains_wildcard = false;
|
||||
for field_name in searchable_attributes {
|
||||
for field_name in attributes_to_search_on {
|
||||
if field_name == "*" {
|
||||
contains_wildcard = true;
|
||||
wildcard = true;
|
||||
// we cannot early exit as we want to returns error in case of unknown fields
|
||||
continue;
|
||||
}
|
||||
let searchable_contains_name =
|
||||
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
||||
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
||||
let searchable_weight =
|
||||
searchable_fields_weights.iter().find(|(name, _, _)| name == field_name);
|
||||
let (fid, weight) = match searchable_weight {
|
||||
// The Field id exist and the field is searchable
|
||||
(Some(fid), Some(true)) | (Some(fid), None) => fid,
|
||||
// The field is searchable but the Field id doesn't exist => Internal Error
|
||||
(None, Some(true)) => {
|
||||
return Err(FieldIdMapMissingEntry::FieldName {
|
||||
field_name: field_name.to_string(),
|
||||
process: "search",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
// The field is not searchable, but the searchableAttributes are set to * => ignore field
|
||||
(None, None) => continue,
|
||||
Some((_name, fid, weight)) => (*fid, *weight),
|
||||
// The field is not searchable but the user didn't define any searchable attributes
|
||||
None if user_defined_searchable.is_none() => continue,
|
||||
// The field is not searchable => User error
|
||||
(_fid, Some(false)) => {
|
||||
let (valid_fields, hidden_fields) = match searchable_names {
|
||||
Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
|
||||
None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
|
||||
};
|
||||
None => {
|
||||
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
|
||||
self.txn,
|
||||
searchable_fields_weights.iter().map(|(name, _, _)| name),
|
||||
)?;
|
||||
|
||||
let field = field_name.to_string();
|
||||
return Err(UserError::InvalidSearchableAttribute {
|
||||
@ -129,13 +138,17 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
};
|
||||
|
||||
if exact_attributes_ids.contains(&fid) {
|
||||
restricted_fids.exact.push(fid);
|
||||
restricted_fids.exact.push((fid, weight));
|
||||
} else {
|
||||
restricted_fids.tolerant.push(fid);
|
||||
restricted_fids.tolerant.push((fid, weight));
|
||||
};
|
||||
}
|
||||
|
||||
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
|
||||
if wildcard {
|
||||
self.restricted_fids = None;
|
||||
} else {
|
||||
self.restricted_fids = Some(restricted_fids);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -158,13 +171,13 @@ impl Word {
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct RestrictedFids {
|
||||
pub tolerant: Vec<FieldId>,
|
||||
pub exact: Vec<FieldId>,
|
||||
pub tolerant: Vec<(FieldId, Weight)>,
|
||||
pub exact: Vec<(FieldId, Weight)>,
|
||||
}
|
||||
|
||||
impl RestrictedFids {
|
||||
pub fn contains(&self, fid: &FieldId) -> bool {
|
||||
self.tolerant.contains(fid) || self.exact.contains(fid)
|
||||
self.tolerant.iter().any(|(id, _)| id == fid) || self.exact.iter().any(|(id, _)| id == fid)
|
||||
}
|
||||
}
|
||||
|
||||
@ -184,6 +197,11 @@ fn resolve_maximally_reduced_query_graph(
|
||||
.iter()
|
||||
.flat_map(|x| x.iter())
|
||||
.collect(),
|
||||
TermsMatchingStrategy::Frequency => query_graph
|
||||
.removal_order_for_terms_matching_strategy_frequency(ctx)?
|
||||
.iter()
|
||||
.flat_map(|x| x.iter())
|
||||
.collect(),
|
||||
TermsMatchingStrategy::All => vec![],
|
||||
};
|
||||
graph.remove_nodes_keep_edges(&nodes_to_remove);
|
||||
@ -530,11 +548,15 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn filtered_universe(ctx: &SearchContext, filters: &Option<Filter>) -> Result<RoaringBitmap> {
|
||||
pub fn filtered_universe(
|
||||
index: &Index,
|
||||
txn: &RoTxn<'_>,
|
||||
filters: &Option<Filter>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
Ok(if let Some(filters) = filters {
|
||||
filters.evaluate(ctx.txn, ctx.index)?
|
||||
filters.evaluate(txn, index)?
|
||||
} else {
|
||||
ctx.index.documents_ids(ctx.txn)?
|
||||
index.documents_ids(txn)?
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::collections::BTreeMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use fxhash::{FxHashMap, FxHasher};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::interner::{FixedSizeInterner, Interned};
|
||||
use super::query_term::{
|
||||
@ -11,6 +12,7 @@ use super::query_term::{
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::SearchContext;
|
||||
use crate::search::new::interner::Interner;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::Result;
|
||||
|
||||
/// A node of the [`QueryGraph`].
|
||||
@ -290,6 +292,49 @@ impl QueryGraph {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy_frequency(
|
||||
&self,
|
||||
ctx: &mut SearchContext,
|
||||
) -> Result<Vec<SmallBitmap<QueryNode>>> {
|
||||
// lookup frequency for each term
|
||||
let mut term_with_frequency: Vec<(u8, u64)> = {
|
||||
let mut term_docids: BTreeMap<u8, RoaringBitmap> = Default::default();
|
||||
for (_, node) in self.nodes.iter() {
|
||||
match &node.data {
|
||||
QueryNodeData::Term(t) => {
|
||||
let docids = compute_query_term_subset_docids(ctx, &t.term_subset)?;
|
||||
for id in t.term_ids.clone() {
|
||||
term_docids
|
||||
.entry(id)
|
||||
.and_modify(|curr| *curr |= &docids)
|
||||
.or_insert_with(|| docids.clone());
|
||||
}
|
||||
}
|
||||
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
|
||||
}
|
||||
}
|
||||
term_docids
|
||||
.into_iter()
|
||||
.map(|(idx, docids)| match docids.len() {
|
||||
0 => (idx, u64::max_value()),
|
||||
frequency => (idx, frequency),
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
term_with_frequency.sort_by_key(|(_, frequency)| Reverse(*frequency));
|
||||
let mut term_weight = BTreeMap::new();
|
||||
let mut weight: u16 = 1;
|
||||
let mut peekable = term_with_frequency.into_iter().peekable();
|
||||
while let Some((idx, frequency)) = peekable.next() {
|
||||
term_weight.insert(idx, weight);
|
||||
if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
|
||||
weight += 1;
|
||||
}
|
||||
}
|
||||
let cost_of_term_idx = move |term_idx: u8| *term_weight.get(&term_idx).unwrap();
|
||||
Ok(self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx))
|
||||
}
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy_last(
|
||||
&self,
|
||||
ctx: &SearchContext,
|
||||
@ -315,10 +360,19 @@ impl QueryGraph {
|
||||
if first_term_idx >= last_term_idx {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let cost_of_term_idx = |term_idx: u8| {
|
||||
let rank = 1 + last_term_idx - term_idx;
|
||||
rank as u16
|
||||
};
|
||||
self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx)
|
||||
}
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy(
|
||||
&self,
|
||||
ctx: &SearchContext,
|
||||
order: impl Fn(u8) -> u16,
|
||||
) -> Vec<SmallBitmap<QueryNode>> {
|
||||
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
|
||||
let mut at_least_one_mandatory_term = false;
|
||||
for (node_id, node) in self.nodes.iter() {
|
||||
@ -329,7 +383,7 @@ impl QueryGraph {
|
||||
}
|
||||
let mut cost = 0;
|
||||
for id in t.term_ids.clone() {
|
||||
cost = std::cmp::max(cost, cost_of_term_idx(id));
|
||||
cost = std::cmp::max(cost, order(id));
|
||||
}
|
||||
nodes_to_remove
|
||||
.entry(cost)
|
||||
|
@ -366,7 +366,7 @@ mod tests {
|
||||
let tokens = tokenizer.tokenize(".");
|
||||
let index = temp_index_with_documents();
|
||||
let rtxn = index.read_txn()?;
|
||||
let mut ctx = SearchContext::new(&index, &rtxn);
|
||||
let mut ctx = SearchContext::new(&index, &rtxn)?;
|
||||
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
|
||||
let ExtractedTokens { query_terms, .. } =
|
||||
located_query_terms_from_tokens(&mut ctx, tokens, None)?;
|
||||
|
@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
use crate::{FieldId, InternalError, Result};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct FidCondition {
|
||||
term: LocatedQueryTermSubset,
|
||||
fid: u16,
|
||||
fid: Option<FieldId>,
|
||||
}
|
||||
|
||||
pub enum FidGraph {}
|
||||
@ -26,13 +26,15 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let FidCondition { term, .. } = condition;
|
||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||
let mut docids = compute_query_term_subset_docids_within_field_id(
|
||||
ctx,
|
||||
&term.term_subset,
|
||||
condition.fid,
|
||||
)?;
|
||||
docids &= universe;
|
||||
|
||||
let docids = if let Some(fid) = condition.fid {
|
||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||
let docids =
|
||||
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
|
||||
docids & universe
|
||||
} else {
|
||||
RoaringBitmap::new()
|
||||
};
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
@ -68,34 +70,29 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
|
||||
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
|
||||
|
||||
let mut edges = vec![];
|
||||
for fid in all_fields.iter().copied() {
|
||||
let weight = weights_map
|
||||
.weight(fid)
|
||||
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
|
||||
edges.push((
|
||||
fid as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid }),
|
||||
weight as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||
));
|
||||
}
|
||||
|
||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||
let max_fid: Option<u16> = {
|
||||
if let Some(max_fid) = ctx
|
||||
.index
|
||||
.searchable_fields_ids(ctx.txn)?
|
||||
.map(|field_ids| field_ids.into_iter().max())
|
||||
{
|
||||
max_fid
|
||||
} else {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
|
||||
}
|
||||
};
|
||||
let max_weight: Option<u16> = weights_map.max_weight();
|
||||
|
||||
if let Some(max_fid) = max_fid {
|
||||
if !all_fields.contains(&max_fid) {
|
||||
if let Some(max_weight) = max_weight {
|
||||
if !all_fields.contains(&max_weight) {
|
||||
edges.push((
|
||||
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||
conditions_interner.insert(FidCondition {
|
||||
term: term.clone(), // TODO remove this ugly clone
|
||||
fid: max_fid,
|
||||
fid: None,
|
||||
}),
|
||||
));
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -131,6 +131,19 @@ fn test_attribute_fid_simple() {
|
||||
#[test]
|
||||
fn test_attribute_fid_ngrams() {
|
||||
let index = create_index();
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 title |
|
||||
2 description |
|
||||
3 plot |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
1 0 |
|
||||
2 1 |
|
||||
3 2 |
|
||||
"###);
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
|
@ -0,0 +1,244 @@
|
||||
---
|
||||
source: milli/src/search/new/tests/attribute_fid.rs
|
||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
(
|
||||
2,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 91,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
6,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
5,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 14,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 79,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
4,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 77,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
3,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 12,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 83,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
9,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 11,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 75,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
8,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 79,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
7,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 73,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
11,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 77,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
10,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
13,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
12,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 78,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
14,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 75,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
0,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 91,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
@ -49,19 +49,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
ctx: &mut SearchContext<'_>,
|
||||
vector_candidates: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
let writer_index = (self.embedder_index as u16) << 8;
|
||||
let readers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
|
||||
.map_while(|k| {
|
||||
arroy::Reader::open(ctx.txn, writer_index | (k as u16), ctx.index.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let readers: std::result::Result<Vec<_>, _> =
|
||||
ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
|
||||
let readers = readers?;
|
||||
|
||||
let target = &self.target;
|
||||
|
111
milli/src/search/similar.rs
Normal file
111
milli/src/search/similar.rs
Normal file
@ -0,0 +1,111 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::vector::Embedder;
|
||||
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
|
||||
|
||||
pub struct Similar<'a> {
|
||||
id: DocumentId,
|
||||
// this should be linked to the String in the query
|
||||
filter: Option<Filter<'a>>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
index: &'a Index,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
}
|
||||
|
||||
impl<'a> Similar<'a> {
|
||||
pub fn new(
|
||||
id: DocumentId,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
index: &'a Index,
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
) -> Self {
|
||||
Self { id, filter: None, offset, limit, rtxn, index, embedder_name, embedder }
|
||||
}
|
||||
|
||||
pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self {
|
||||
self.filter = Some(filter);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<SearchResult> {
|
||||
let universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
|
||||
|
||||
let embedder_index =
|
||||
self.index
|
||||
.embedder_category_id
|
||||
.get(self.rtxn, &self.embedder_name)?
|
||||
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
||||
|
||||
let readers: std::result::Result<Vec<_>, _> =
|
||||
self.index.arroy_readers(self.rtxn, embedder_index).collect();
|
||||
|
||||
let readers = readers?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
for reader in readers.iter() {
|
||||
let nns_by_item = reader.nns_by_item(
|
||||
self.rtxn,
|
||||
self.id,
|
||||
self.limit + self.offset + 1,
|
||||
None,
|
||||
Some(&universe),
|
||||
)?;
|
||||
if let Some(mut nns_by_item) = nns_by_item {
|
||||
results.append(&mut nns_by_item);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||
|
||||
let mut documents_ids = Vec::with_capacity(self.limit);
|
||||
let mut document_scores = Vec::with_capacity(self.limit);
|
||||
// list of documents we've already seen, so that we don't return the same document multiple times.
|
||||
// initialized to the target document, that we never want to return.
|
||||
let mut documents_seen = RoaringBitmap::new();
|
||||
documents_seen.insert(self.id);
|
||||
|
||||
for (docid, distance) in results
|
||||
.into_iter()
|
||||
// skip documents we've already seen & mark that we saw the current document
|
||||
.filter(|(docid, _)| documents_seen.insert(*docid))
|
||||
.skip(self.offset)
|
||||
// take **after** filter and skip so that we get exactly limit elements if available
|
||||
.take(self.limit)
|
||||
{
|
||||
documents_ids.push(docid);
|
||||
|
||||
let score = 1.0 - distance;
|
||||
let score = self
|
||||
.embedder
|
||||
.distribution()
|
||||
.map(|distribution| distribution.shift(score))
|
||||
.unwrap_or(score);
|
||||
|
||||
let score = ScoreDetails::Vector(score_details::Vector { similarity: Some(score) });
|
||||
|
||||
document_scores.push(vec![score]);
|
||||
}
|
||||
|
||||
Ok(SearchResult {
|
||||
matching_words: Default::default(),
|
||||
candidates: universe,
|
||||
documents_ids,
|
||||
document_scores,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
})
|
||||
}
|
||||
}
|
@ -308,6 +308,25 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snap_fieldids_weights_map(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let weights_map = index.fieldids_weights_map(&rtxn).unwrap();
|
||||
|
||||
let mut snap = String::new();
|
||||
writeln!(&mut snap, "fid weight").unwrap();
|
||||
let mut field_ids: Vec<_> = weights_map.ids().collect();
|
||||
field_ids.sort();
|
||||
for field_id in field_ids {
|
||||
let weight = weights_map.weight(field_id).unwrap();
|
||||
writeln!(&mut snap, "{field_id:<3} {weight:<3} |").unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snap_searchable_fields(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||
format!("{searchable_fields:?}")
|
||||
}
|
||||
pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||
@ -469,6 +488,12 @@ macro_rules! full_snap_of_db {
|
||||
($index:ident, fields_ids_map) => {{
|
||||
$crate::snapshot_tests::snap_fields_ids_map(&$index)
|
||||
}};
|
||||
($index:ident, fieldids_weights_map) => {{
|
||||
$crate::snapshot_tests::snap_fieldids_weights_map(&$index)
|
||||
}};
|
||||
($index:ident, searchable_fields) => {{
|
||||
$crate::snapshot_tests::snap_searchable_fields(&$index)
|
||||
}};
|
||||
($index:ident, geo_faceted_documents_ids) => {{
|
||||
$crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
|
||||
}};
|
||||
|
@ -21,8 +21,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
||||
name = "clear_documents"
|
||||
)]
|
||||
pub fn execute(self) -> Result<u64> {
|
||||
puffin::profile_function!();
|
||||
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
let Index {
|
||||
env: _env,
|
||||
|
@ -379,7 +379,7 @@ pub(crate) mod test_helpers {
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
let options = options.map_size(4096 * 4 * 1000 * 100);
|
||||
let tempdir = tempfile::TempDir::new().unwrap();
|
||||
let env = options.open(tempdir.path()).unwrap();
|
||||
let env = unsafe { options.open(tempdir.path()) }.unwrap();
|
||||
let mut wtxn = env.write_txn().unwrap();
|
||||
let content = env.create_database(&mut wtxn, None).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
@ -29,8 +29,6 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
autogenerate_docids: bool,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
|
||||
|
@ -29,8 +29,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -186,7 +184,7 @@ fn searchable_fields_changed(
|
||||
) -> bool {
|
||||
let searchable_fields = &settings_diff.new.searchable_fields_ids;
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
||||
if searchable_fields.contains(&field_id) {
|
||||
let del_add = KvReaderDelAdd::new(field_bytes);
|
||||
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
|
||||
// if both fields are None, check the next field.
|
||||
@ -298,7 +296,7 @@ fn lang_safe_tokens_from_document<'a>(
|
||||
/// Extract words mapped with their positions of a document.
|
||||
fn tokens_from_document<'a>(
|
||||
obkv: &KvReader<FieldId>,
|
||||
searchable_fields: &Option<Vec<FieldId>>,
|
||||
searchable_fields: &[FieldId],
|
||||
tokenizer: &Tokenizer,
|
||||
max_positions_per_attributes: u32,
|
||||
del_add: DelAdd,
|
||||
@ -309,7 +307,7 @@ fn tokens_from_document<'a>(
|
||||
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
// if field is searchable.
|
||||
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
||||
if searchable_fields.as_ref().contains(&field_id) {
|
||||
// extract deletion or addition only.
|
||||
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
|
||||
// parse json.
|
||||
|
@ -23,8 +23,6 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
_settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_number_docids_sorter = create_sorter(
|
||||
|
@ -28,8 +28,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
_settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
|
||||
|
@ -45,10 +45,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||
@ -127,12 +124,18 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
add_exists.insert(document);
|
||||
}
|
||||
|
||||
let geo_support =
|
||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let del_geo_support = settings_diff
|
||||
.old
|
||||
.geo_fields_ids
|
||||
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let add_geo_support = settings_diff
|
||||
.new
|
||||
.geo_fields_ids
|
||||
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let del_filterable_values =
|
||||
del_value.map(|value| extract_facet_values(&value, geo_support));
|
||||
del_value.map(|value| extract_facet_values(&value, del_geo_support));
|
||||
let add_filterable_values =
|
||||
add_value.map(|value| extract_facet_values(&value, geo_support));
|
||||
add_value.map(|value| extract_facet_values(&value, add_geo_support));
|
||||
|
||||
// Those closures are just here to simplify things a bit.
|
||||
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
||||
|
@ -26,8 +26,6 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
_settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_word_count_docids_sorter = create_sorter(
|
||||
|
@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::error::GeoError;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::extract_finite_float_from_value;
|
||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::{FieldId, InternalError, Result};
|
||||
|
||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||
@ -18,10 +19,8 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
@ -40,47 +39,27 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
serde_json::from_slice(document_id).unwrap()
|
||||
};
|
||||
|
||||
// first we get the two fields
|
||||
match (obkv.get(lat_fid), obkv.get(lng_fid)) {
|
||||
(Some(lat), Some(lng)) => {
|
||||
let deladd_lat_obkv = KvReaderDelAdd::new(lat);
|
||||
let deladd_lng_obkv = KvReaderDelAdd::new(lng);
|
||||
// extract old version
|
||||
let del_lat_lng =
|
||||
extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
|
||||
// extract new version
|
||||
let add_lat_lng =
|
||||
extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
|
||||
|
||||
// then we extract the values
|
||||
let del_lat_lng = deladd_lat_obkv
|
||||
.get(DelAdd::Deletion)
|
||||
.zip(deladd_lng_obkv.get(DelAdd::Deletion))
|
||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
||||
.transpose()?;
|
||||
let add_lat_lng = deladd_lat_obkv
|
||||
.get(DelAdd::Addition)
|
||||
.zip(deladd_lng_obkv.get(DelAdd::Addition))
|
||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
||||
.transpose()?;
|
||||
|
||||
if del_lat_lng != add_lat_lng {
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
if let Some([lat, lng]) = del_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||
}
|
||||
if let Some([lat, lng]) = add_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Addition, bytes)?;
|
||||
}
|
||||
let bytes = obkv.into_inner()?;
|
||||
writer.insert(docid_bytes, bytes)?;
|
||||
}
|
||||
if del_lat_lng != add_lat_lng {
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
if let Some([lat, lng]) = del_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||
if let Some([lat, lng]) = add_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Addition, bytes)?;
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, None) => (),
|
||||
let bytes = obkv.into_inner()?;
|
||||
writer.insert(docid_bytes, bytes)?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,16 +67,37 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
/// Extract the finite floats lat and lng from two bytes slices.
|
||||
fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
|
||||
let lat = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||
fn extract_lat_lng(
|
||||
document: &obkv::KvReader<FieldId>,
|
||||
settings: &InnerIndexSettings,
|
||||
deladd: DelAdd,
|
||||
document_id: impl Fn() -> Value,
|
||||
) -> Result<Option<[f64; 2]>> {
|
||||
match settings.geo_fields_ids {
|
||||
Some((lat_fid, lng_fid)) => {
|
||||
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||
let (lat, lng) = match (lat, lng) {
|
||||
(Some(lat), Some(lng)) => (lat, lng),
|
||||
(Some(_), None) => {
|
||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, None) => return Ok(None),
|
||||
};
|
||||
let lat = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||
|
||||
let lng = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||
|
||||
Ok([lat, lng])
|
||||
let lng = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||
Ok(Some([lat, lng]))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
@ -10,16 +10,16 @@ use bytemuck::cast_slice;
|
||||
use grenad::Writer;
|
||||
use itertools::EitherOrBoth;
|
||||
use ordered_float::OrderedFloat;
|
||||
use serde_json::{from_slice, Value};
|
||||
use serde_json::Value;
|
||||
|
||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::error::UserError;
|
||||
use crate::prompt::Prompt;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::helpers::try_split_at;
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::vector::Embedder;
|
||||
use crate::{DocumentId, InternalError, Result, ThreadPoolNoAbort, VectorOrArrayOfVectors};
|
||||
use crate::{DocumentId, Result, ThreadPoolNoAbort};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
|
||||
@ -31,6 +31,10 @@ pub struct ExtractedVectorPoints {
|
||||
pub remove_vectors: grenad::Reader<BufReader<File>>,
|
||||
// docid -> prompt
|
||||
pub prompts: grenad::Reader<BufReader<File>>,
|
||||
|
||||
// embedder
|
||||
pub embedder_name: String,
|
||||
pub embedder: Arc<Embedder>,
|
||||
}
|
||||
|
||||
enum VectorStateDelta {
|
||||
@ -65,6 +69,19 @@ impl VectorStateDelta {
|
||||
}
|
||||
}
|
||||
|
||||
struct EmbedderVectorExtractor {
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
prompt: Arc<Prompt>,
|
||||
|
||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||
manual_vectors_writer: Writer<BufWriter<File>>,
|
||||
// (docid) -> (prompt)
|
||||
prompts_writer: Writer<BufWriter<File>>,
|
||||
// (docid) -> ()
|
||||
remove_vectors_writer: Writer<BufWriter<File>>,
|
||||
}
|
||||
|
||||
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
|
||||
@ -73,34 +90,52 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
prompt: &Prompt,
|
||||
embedder_name: &str,
|
||||
) -> Result<ExtractedVectorPoints> {
|
||||
puffin::profile_function!();
|
||||
) -> Result<Vec<ExtractedVectorPoints>> {
|
||||
let reindex_vectors = settings_diff.reindex_vectors();
|
||||
|
||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
|
||||
// the vector field id may have changed
|
||||
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
|
||||
// filter the old vector fid if the settings has been changed forcing reindexing.
|
||||
let old_vectors_fid = old_vectors_fid.filter(|_| !reindex_vectors);
|
||||
|
||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||
let mut manual_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
let new_vectors_fid = new_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
|
||||
|
||||
// (docid) -> (prompt)
|
||||
let mut prompts_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
let mut extractors = Vec::new();
|
||||
for (embedder_name, (embedder, prompt)) in
|
||||
settings_diff.new.embedding_configs.clone().into_iter()
|
||||
{
|
||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||
let manual_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
// (docid) -> ()
|
||||
let mut remove_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
// (docid) -> (prompt)
|
||||
let prompts_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
// (docid) -> ()
|
||||
let remove_vectors_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
extractors.push(EmbedderVectorExtractor {
|
||||
embedder_name,
|
||||
embedder,
|
||||
prompt,
|
||||
manual_vectors_writer,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
});
|
||||
}
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
@ -114,152 +149,138 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(docid_bytes);
|
||||
|
||||
// since we only needs the primary key when we throw an error we create this getter to
|
||||
// since we only need the primary key when we throw an error we create this getter to
|
||||
// lazily get it when needed
|
||||
let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };
|
||||
|
||||
// the vector field id may have changed
|
||||
let old_vectors_fid = old_fields_ids_map.id("_vectors");
|
||||
// filter the old vector fid if the settings has been changed forcing reindexing.
|
||||
let old_vectors_fid = old_vectors_fid.filter(|_| !settings_diff.reindex_vectors());
|
||||
let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid)
|
||||
.map_err(|error| error.to_crate_error(document_id().to_string()))?;
|
||||
|
||||
let new_vectors_fid = new_fields_ids_map.id("_vectors");
|
||||
let vectors_field = {
|
||||
let del = old_vectors_fid
|
||||
.and_then(|vectors_fid| obkv.get(vectors_fid))
|
||||
.map(KvReaderDelAdd::new)
|
||||
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion, &document_id))
|
||||
.transpose()?
|
||||
.flatten();
|
||||
let add = new_vectors_fid
|
||||
.and_then(|vectors_fid| obkv.get(vectors_fid))
|
||||
.map(KvReaderDelAdd::new)
|
||||
.map(|obkv| to_vector_map(obkv, DelAdd::Addition, &document_id))
|
||||
.transpose()?
|
||||
.flatten();
|
||||
(del, add)
|
||||
};
|
||||
for EmbedderVectorExtractor {
|
||||
embedder_name,
|
||||
embedder: _,
|
||||
prompt,
|
||||
manual_vectors_writer,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
} in extractors.iter_mut()
|
||||
{
|
||||
let delta = match parsed_vectors.remove(embedder_name) {
|
||||
(Some(old), Some(new)) => {
|
||||
// no autogeneration
|
||||
let del_vectors = old.into_array_of_vectors();
|
||||
let add_vectors = new.into_array_of_vectors();
|
||||
|
||||
let (del_map, add_map) = vectors_field;
|
||||
|
||||
let del_value = del_map.and_then(|mut map| map.remove(embedder_name));
|
||||
let add_value = add_map.and_then(|mut map| map.remove(embedder_name));
|
||||
|
||||
let delta = match (del_value, add_value) {
|
||||
(Some(old), Some(new)) => {
|
||||
// no autogeneration
|
||||
let del_vectors = extract_vectors(old, document_id, embedder_name)?;
|
||||
let add_vectors = extract_vectors(new, document_id, embedder_name)?;
|
||||
|
||||
if add_vectors.len() > usize::from(u8::MAX) {
|
||||
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
|
||||
document_id().to_string(),
|
||||
add_vectors.len(),
|
||||
)));
|
||||
}
|
||||
|
||||
VectorStateDelta::ManualDelta(del_vectors, add_vectors)
|
||||
}
|
||||
(Some(_old), None) => {
|
||||
// Do we keep this document?
|
||||
let document_is_kept = obkv
|
||||
.iter()
|
||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||
if document_is_kept {
|
||||
// becomes autogenerated
|
||||
VectorStateDelta::NowGenerated(prompt.render(
|
||||
obkv,
|
||||
DelAdd::Addition,
|
||||
new_fields_ids_map,
|
||||
)?)
|
||||
} else {
|
||||
VectorStateDelta::NowRemoved
|
||||
}
|
||||
}
|
||||
(None, Some(new)) => {
|
||||
// was possibly autogenerated, remove all vectors for that document
|
||||
let add_vectors = extract_vectors(new, document_id, embedder_name)?;
|
||||
if add_vectors.len() > usize::from(u8::MAX) {
|
||||
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
|
||||
document_id().to_string(),
|
||||
add_vectors.len(),
|
||||
)));
|
||||
}
|
||||
|
||||
VectorStateDelta::WasGeneratedNowManual(add_vectors)
|
||||
}
|
||||
(None, None) => {
|
||||
// Do we keep this document?
|
||||
let document_is_kept = obkv
|
||||
.iter()
|
||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||
|
||||
if document_is_kept {
|
||||
// Don't give up if the old prompt was failing
|
||||
let old_prompt = Some(prompt)
|
||||
// TODO: this filter works because we erase the vec database when a embedding setting changes.
|
||||
// When vector pipeline will be optimized, this should be removed.
|
||||
.filter(|_| !settings_diff.reindex_vectors())
|
||||
.map(|p| {
|
||||
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
|
||||
});
|
||||
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
if old_prompt.as_ref() != Some(&new_prompt) {
|
||||
let old_prompt = old_prompt.unwrap_or_default();
|
||||
tracing::trace!(
|
||||
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
||||
);
|
||||
VectorStateDelta::NowGenerated(new_prompt)
|
||||
} else {
|
||||
tracing::trace!("⏭️ Prompt unmodified, skipping");
|
||||
VectorStateDelta::NoChange
|
||||
if add_vectors.len() > usize::from(u8::MAX) {
|
||||
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
|
||||
document_id().to_string(),
|
||||
add_vectors.len(),
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
VectorStateDelta::NowRemoved
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// and we finally push the unique vectors into the writer
|
||||
push_vectors_diff(
|
||||
&mut remove_vectors_writer,
|
||||
&mut prompts_writer,
|
||||
&mut manual_vectors_writer,
|
||||
&mut key_buffer,
|
||||
delta,
|
||||
settings_diff,
|
||||
)?;
|
||||
VectorStateDelta::ManualDelta(del_vectors, add_vectors)
|
||||
}
|
||||
(Some(_old), None) => {
|
||||
// Do we keep this document?
|
||||
let document_is_kept = obkv
|
||||
.iter()
|
||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||
if document_is_kept {
|
||||
// becomes autogenerated
|
||||
VectorStateDelta::NowGenerated(prompt.render(
|
||||
obkv,
|
||||
DelAdd::Addition,
|
||||
new_fields_ids_map,
|
||||
)?)
|
||||
} else {
|
||||
VectorStateDelta::NowRemoved
|
||||
}
|
||||
}
|
||||
(None, Some(new)) => {
|
||||
// was possibly autogenerated, remove all vectors for that document
|
||||
let add_vectors = new.into_array_of_vectors();
|
||||
if add_vectors.len() > usize::from(u8::MAX) {
|
||||
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
|
||||
document_id().to_string(),
|
||||
add_vectors.len(),
|
||||
)));
|
||||
}
|
||||
|
||||
VectorStateDelta::WasGeneratedNowManual(add_vectors)
|
||||
}
|
||||
(None, None) => {
|
||||
// Do we keep this document?
|
||||
let document_is_kept = obkv
|
||||
.iter()
|
||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||
|
||||
if document_is_kept {
|
||||
// Don't give up if the old prompt was failing
|
||||
let old_prompt = Some(&prompt)
|
||||
// TODO: this filter works because we erase the vec database when a embedding setting changes.
|
||||
// When vector pipeline will be optimized, this should be removed.
|
||||
.filter(|_| !settings_diff.reindex_vectors())
|
||||
.map(|p| {
|
||||
p.render(obkv, DelAdd::Deletion, old_fields_ids_map)
|
||||
.unwrap_or_default()
|
||||
});
|
||||
let new_prompt =
|
||||
prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
if old_prompt.as_ref() != Some(&new_prompt) {
|
||||
let old_prompt = old_prompt.unwrap_or_default();
|
||||
tracing::trace!(
|
||||
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
||||
);
|
||||
VectorStateDelta::NowGenerated(new_prompt)
|
||||
} else {
|
||||
tracing::trace!("⏭️ Prompt unmodified, skipping");
|
||||
VectorStateDelta::NoChange
|
||||
}
|
||||
} else {
|
||||
VectorStateDelta::NowRemoved
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// and we finally push the unique vectors into the writer
|
||||
push_vectors_diff(
|
||||
remove_vectors_writer,
|
||||
prompts_writer,
|
||||
manual_vectors_writer,
|
||||
&mut key_buffer,
|
||||
delta,
|
||||
reindex_vectors,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ExtractedVectorPoints {
|
||||
// docid, _index -> KvWriterDelAdd -> Vector
|
||||
manual_vectors: writer_into_reader(manual_vectors_writer)?,
|
||||
// docid -> ()
|
||||
remove_vectors: writer_into_reader(remove_vectors_writer)?,
|
||||
// docid -> prompt
|
||||
prompts: writer_into_reader(prompts_writer)?,
|
||||
})
|
||||
}
|
||||
let mut results = Vec::new();
|
||||
|
||||
fn to_vector_map(
|
||||
obkv: KvReaderDelAdd,
|
||||
side: DelAdd,
|
||||
document_id: &impl Fn() -> Value,
|
||||
) -> Result<Option<serde_json::Map<String, Value>>> {
|
||||
Ok(if let Some(value) = obkv.get(side) {
|
||||
let Ok(value) = from_slice(value) else {
|
||||
let value = from_slice(value).map_err(InternalError::SerdeJson)?;
|
||||
return Err(crate::Error::UserError(UserError::InvalidVectorsMapType {
|
||||
document_id: document_id(),
|
||||
value,
|
||||
}));
|
||||
};
|
||||
Some(value)
|
||||
} else {
|
||||
None
|
||||
})
|
||||
for EmbedderVectorExtractor {
|
||||
embedder_name,
|
||||
embedder,
|
||||
prompt: _,
|
||||
manual_vectors_writer,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
} in extractors
|
||||
{
|
||||
results.push(ExtractedVectorPoints {
|
||||
// docid, _index -> KvWriterDelAdd -> Vector
|
||||
manual_vectors: writer_into_reader(manual_vectors_writer)?,
|
||||
// docid -> ()
|
||||
remove_vectors: writer_into_reader(remove_vectors_writer)?,
|
||||
// docid -> prompt
|
||||
prompts: writer_into_reader(prompts_writer)?,
|
||||
|
||||
embedder,
|
||||
embedder_name,
|
||||
})
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Computes the diff between both Del and Add numbers and
|
||||
@ -270,14 +291,13 @@ fn push_vectors_diff(
|
||||
manual_vectors_writer: &mut Writer<BufWriter<File>>,
|
||||
key_buffer: &mut Vec<u8>,
|
||||
delta: VectorStateDelta,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
reindex_vectors: bool,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
||||
if must_remove
|
||||
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||
// When vector pipeline will be optimized, this should be removed.
|
||||
&& !settings_diff.reindex_vectors()
|
||||
&& !reindex_vectors
|
||||
{
|
||||
key_buffer.truncate(TRUNCATE_SIZE);
|
||||
remove_vectors_writer.insert(&key_buffer, [])?;
|
||||
@ -308,7 +328,7 @@ fn push_vectors_diff(
|
||||
EitherOrBoth::Left(vector) => {
|
||||
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||
// When vector pipeline will be optimized, this should be removed.
|
||||
if !settings_diff.reindex_vectors() {
|
||||
if !reindex_vectors {
|
||||
// We insert only the Del part of the Obkv to inform
|
||||
// that we only want to remove all those vectors.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
@ -336,26 +356,6 @@ fn compare_vectors(a: &[f32], b: &[f32]) -> Ordering {
|
||||
a.iter().copied().map(OrderedFloat).cmp(b.iter().copied().map(OrderedFloat))
|
||||
}
|
||||
|
||||
/// Extracts the vectors from a JSON value.
|
||||
fn extract_vectors(
|
||||
value: Value,
|
||||
document_id: impl Fn() -> Value,
|
||||
name: &str,
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
// FIXME: ugly clone of the vectors here
|
||||
match serde_json::from_value(value.clone()) {
|
||||
Ok(vectors) => {
|
||||
Ok(VectorOrArrayOfVectors::into_array_of_vectors(vectors).unwrap_or_default())
|
||||
}
|
||||
Err(_) => Err(UserError::InvalidVectorsType {
|
||||
document_id: document_id(),
|
||||
value,
|
||||
subfield: name.to_owned(),
|
||||
}
|
||||
.into()),
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
// docid, prompt
|
||||
@ -364,7 +364,6 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
embedder: Arc<Embedder>,
|
||||
request_threads: &ThreadPoolNoAbort,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
||||
let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk
|
||||
|
||||
|
@ -36,8 +36,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
grenad::Reader<BufReader<File>>,
|
||||
grenad::Reader<BufReader<File>>,
|
||||
)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_fid_docids_sorter = create_sorter(
|
||||
@ -167,8 +165,6 @@ fn words_into_sorter(
|
||||
add_words: &BTreeSet<Vec<u8>>,
|
||||
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
|
||||
|
@ -26,7 +26,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
|
||||
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
|
||||
|
||||
@ -71,8 +70,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
|
||||
// if we change document, we fill the sorter
|
||||
if current_document_id.map_or(false, |id| id != document_id) {
|
||||
puffin::profile_scope!("Document into sorter");
|
||||
|
||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
|
||||
let _entered = span.enter();
|
||||
@ -163,7 +160,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
if let Some(document_id) = current_document_id {
|
||||
puffin::profile_scope!("Final document into sorter");
|
||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
|
||||
let _entered = span.enter();
|
||||
@ -176,7 +172,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
)?;
|
||||
}
|
||||
{
|
||||
puffin::profile_scope!("sorter_into_reader");
|
||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
|
||||
let _entered = span.enter();
|
||||
|
@ -25,8 +25,6 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
_settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
@ -104,8 +102,6 @@ fn words_position_into_sorter(
|
||||
add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
|
||||
word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user