Compare commits

..

1 Commits

Author SHA1 Message Date
680868ef77 Use weak ptr in panic handler 2024-06-19 12:22:46 +02:00
170 changed files with 2211 additions and 14413 deletions

View File

@ -43,11 +43,4 @@ jobs:
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" \
--dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" \
--reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" \
-- ${{ steps.command.outputs.command-arguments }} > benchlinks.txt
- name: Send comment in PR
run: |
gh pr comment ${{github.event.issue.number}} --body-file benchlinks.txt
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" -- ${{ steps.command.outputs.command-arguments }}

View File

@ -116,7 +116,7 @@ jobs:
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then
echo "lindera has been found in the sources and it shouldn't"
exit 1
fi

View File

@ -187,8 +187,8 @@ They are JSON files with the following structure (comments are not actually supp
},
// Core of the workload.
// A list of commands to run sequentially.
// Optional: A precommand is a request to the Meilisearch instance that is executed before the profiling runs.
"precommands": [
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
"commands": [
{
// Meilisearch route to call. `http://localhost:7700/` will be prepended.
"route": "indexes/movies/settings",
@ -224,11 +224,8 @@ They are JSON files with the following structure (comments are not actually supp
// - DontWait: run the next command without waiting the response to this one.
// - WaitForResponse: run the next command as soon as the response from the server is received.
// - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing.
"synchronous": "WaitForTask"
}
],
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
"commands": [
"synchronous": "DontWait"
},
{
"route": "indexes/movies/documents",
"method": "POST",

731
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.9.0"
version = "1.8.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@ -25,7 +25,7 @@
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
<p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
@ -39,8 +39,8 @@
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
@ -55,15 +55,15 @@
## đź“– Documentation
You can consult Meilisearch's documentation at [meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
## 🌍 Supercharge your Meilisearch experience
## ⚡ Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Additional features include analytics & monitoring in many regions around the world. No credit card is required.
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
## đź§° SDKs & integration tools
@ -85,13 +85,13 @@ Finally, for more in-depth information, refer to our articles explaining fundame
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
## đź“« Get in touch!
Meilisearch is a search engine created by [Meili]([https://www.welcometothejungle.com/en/companies/meilisearch](https://www.meilisearch.com/careers)), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
đź—ž [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.

View File

@ -197,140 +197,6 @@ pub(crate) mod test {
use super::*;
use crate::reader::v6::RuntimeTogglableFeatures;
#[test]
fn import_dump_v6_with_vectors() {
// dump containing two indexes
//
// "vector", configured with an embedder
// contains:
// - one document with an overriden vector,
// - one document with a natural vector
// - one document with a _vectors map containing one additional embedder name and a natural vector
// - one document with a _vectors map containing one additional embedder name and an overriden vector
//
// "novector", no embedder
// contains:
// - a document without vector
// - a document with a random _vectors field
let dump = File::open("tests/assets/v6-with-vectors.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"278f63325ef06ca04d01df98d8207b94");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_none());
assert!(update_files[2].is_none());
assert!(update_files[3].is_none());
assert!(update_files[4].is_none());
assert!(update_files[5].is_none());
assert!(update_files[6].is_none());
assert!(update_files[7].is_none());
assert!(update_files[8].is_none());
assert!(update_files[9].is_none());
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut vector_index = indexes.pop().unwrap();
let mut novector_index = indexes.pop().unwrap();
assert!(indexes.is_empty());
// vector
insta::assert_json_snapshot!(vector_index.metadata(), @r###"
{
"uid": "vector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:17.240962Z",
"updatedAt": "2024-05-16T15:40:55.723052Z"
}
"###);
{
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 4);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
}
// novector
insta::assert_json_snapshot!(novector_index.metadata(), @r###"
{
"uid": "novector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:03.568055Z",
"updatedAt": "2024-05-16T15:33:07.530217Z"
}
"###);
insta::assert_json_snapshot!(novector_index.settings().unwrap().embedders, @"null");
{
let documents: Result<Vec<_>> = novector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 2);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e1",
"other": "random1",
"_vectors": "toto"
}
"###);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e0",
"other": "random0"
}
"###);
}
}
assert_eq!(
dump.features().unwrap().unwrap(),
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
);
}
#[test]
fn import_dump_v6_experimental() {
let dump = File::open("tests/assets/v6-with-experimental.dump").unwrap();

View File

@ -1,783 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e3",
"desc": "overriden vector + map",
"_vectors": {
"default": [
0.2,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
],
"toto": [
0.1
]
}
}

View File

@ -1,786 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e2",
"desc": "natural vector + map",
"_vectors": {
"toto": [],
"default": {
"embeddings": [
[
-0.05189208313822746,
-0.9273212552070618,
0.1443813145160675,
0.0932632014155388,
0.2665371894836426,
0.36266782879829407,
0.6402910947799683,
0.32014018297195435,
0.030915971845388412,
-0.9312191605567932,
-0.3718109726905823,
-0.2700554132461548,
-1.1014580726623535,
0.9154956936836244,
-0.3406888246536255,
1.0077725648880005,
0.6577560901641846,
-0.3955195546150207,
-0.4148270785808563,
0.1855088472366333,
0.5062315464019775,
-0.3632686734199524,
-0.2277890294790268,
0.2560805082321167,
-0.3853609561920166,
-0.1604762226343155,
-0.13947471976280212,
-0.20147813856601715,
-0.4466346800327301,
-0.3761846721172333,
0.1443382054567337,
0.18205296993255615,
0.49359792470932007,
-0.22538000345230105,
-0.4996317625045776,
-0.22734887897968292,
-0.6034309267997742,
-0.7857939600944519,
-0.34923747181892395,
-0.3466345965862274,
0.21176661550998688,
-0.5101462006568909,
-0.3403083384037018,
0.000315118464641273,
0.236465722322464,
-0.10246097296476364,
-1.3013339042663574,
0.3419138789176941,
-0.32963496446609497,
-0.0901619717478752,
-0.5426247119903564,
0.22656650841236117,
-0.44758284091949463,
0.14151698350906372,
-0.1089438870549202,
0.5500766634941101,
-0.670711100101471,
-0.6227269768714905,
0.3894464075565338,
-0.27609574794769287,
0.7028202414512634,
-0.19697771966457367,
0.328511506319046,
0.5063360929489136,
0.4065195322036743,
0.2614171802997589,
-0.30274391174316406,
1.0393824577331543,
-0.7742937207221985,
-0.7874112129211426,
-0.6749666929244995,
0.5190866589546204,
0.004123548045754433,
-0.28312963247299194,
-0.038731709122657776,
-1.0142987966537476,
-0.09519586712121964,
0.8755272626876831,
0.4876938760280609,
0.7811151742935181,
0.85174959897995,
0.11826585978269576,
0.5373436808586121,
0.3649002015590668,
0.19064077734947205,
-0.00287026260048151,
-0.7305403351783752,
-0.015206154435873032,
-0.7899249196052551,
0.19407285749912265,
0.08596625179052353,
-0.28976231813430786,
-0.1525907665491104,
0.3798313438892365,
0.050306469202041626,
-0.5697937607765198,
0.4219021201133728,
0.276252806186676,
0.1559903472661972,
0.10030482709407806,
-0.4043720066547394,
-0.1969818025827408,
0.5739826560020447,
0.2116064727306366,
-1.4620544910430908,
-0.7802462577819824,
-0.24739810824394223,
-0.09791352599859238,
-0.4413802027702331,
0.21549351513385773,
-0.9520436525344848,
-0.08762510865926743,
0.08154498040676117,
-0.6154940724372864,
-1.01079523563385,
0.885427713394165,
0.6967288851737976,
0.27186504006385803,
-0.43194177746772766,
-0.11248451471328735,
0.7576630711555481,
0.4998855590820313,
0.0264343973249197,
0.9872855544090272,
0.5634694695472717,
0.053698331117630005,
0.19410227239131927,
0.3570743501186371,
-0.23670297861099243,
-0.9114483594894408,
0.07884842902421951,
0.7318344116210938,
0.44630110263824463,
0.08745364099740982,
-0.347101628780365,
-0.4314247667789459,
-0.5060274004936218,
0.003706763498485088,
0.44320008158683777,
-0.00788921769708395,
-0.1368623524904251,
-0.17391923069953918,
0.14473655819892883,
0.10927865654230118,
0.6974599361419678,
0.005052129738032818,
-0.016953065991401672,
-0.1256176233291626,
-0.036742497235536575,
0.5591985583305359,
-0.37619709968566895,
0.22429119050502777,
0.5403043031692505,
-0.8603790998458862,
-0.3456307053565979,
0.9292937517166138,
0.5074859261512756,
0.6310645937919617,
-0.3091641068458557,
0.46902573108673096,
0.7891915440559387,
0.4499550759792328,
0.2744995653629303,
0.2712305784225464,
-0.04349074140191078,
-0.3638863265514374,
0.7839881777763367,
0.7352104783058167,
-0.19457511603832245,
-0.5957832932472229,
-0.43704694509506226,
-1.084769368171692,
0.4904985725879669,
0.5385226011276245,
0.1891629993915558,
0.12338479608297348,
0.8315675258636475,
-0.07830192148685455,
1.0916285514831543,
-0.28066861629486084,
-1.3585069179534912,
0.5203898549079895,
0.08678033947944641,
-0.2566044330596924,
0.09484415501356123,
-0.0180208683013916,
1.0264745950698853,
-0.023572135716676712,
0.5864979028701782,
0.7625196576118469,
-0.2543414533138275,
-0.8877770900726318,
0.7611982822418213,
-0.06220436468720436,
0.937336564064026,
0.2704363465309143,
-0.37733694911003113,
0.5076137781143188,
-0.30641937255859375,
0.6252772808074951,
-0.0823579877614975,
-0.03736555948853493,
0.4131673276424408,
-0.6514252424240112,
0.12918265163898468,
-0.4483584463596344,
0.6750786304473877,
-0.37008383870124817,
-0.02324833907186985,
0.38027650117874146,
-0.26374951004981995,
0.4346931278705597,
0.42882832884788513,
-0.48798441886901855,
1.1882442235946655,
0.5132288336753845,
0.5284568667411804,
-0.03538886830210686,
0.29620853066444397,
-1.0683696269989014,
0.25936177372932434,
0.10404160618782043,
-0.25796034932136536,
0.027896970510482788,
-0.09225251525640488,
1.4811025857925415,
0.641173779964447,
-0.13838383555412292,
-0.3437179923057556,
0.5667019486427307,
-0.5400741696357727,
0.31090837717056274,
0.6470608115196228,
-0.3747067153453827,
-0.7364534735679626,
-0.07431528717279434,
0.5173454880714417,
-0.6578747034072876,
0.7107478976249695,
-0.7918999791145325,
-0.0648345872759819,
0.609937846660614,
-0.7329513430595398,
0.9741371870040894,
0.17912346124649048,
-0.02658769302070141,
0.5162150859832764,
-0.3978803157806397,
-0.7833885550498962,
-0.6497276425361633,
-0.3898126780986786,
-0.0952848568558693,
0.2663288116455078,
-0.1604052186012268,
0.373076468706131,
-0.8357769250869751,
-0.05217683315277099,
-0.2680160701274872,
0.8389158248901367,
0.6833611130714417,
-0.6712407469749451,
0.7406917214393616,
-0.44522786140441895,
-0.34645363688468933,
-0.27384576201438904,
-0.9878405928611756,
-0.8166060447692871,
0.06268279999494553,
0.38567957282066345,
-0.3274703919887543,
0.5296315550804138,
-0.11810623109340668,
0.23029841482639313,
0.08616159111261368,
-0.2195747196674347,
0.09430307894945145,
0.4057176411151886,
0.4892159104347229,
-0.1636916548013687,
-0.6071445345878601,
0.41256585717201233,
0.622254490852356,
-0.41223976016044617,
-0.6686707139015198,
-0.7474371790885925,
-0.8509522080421448,
-0.16754287481307983,
-0.9078601002693176,
-0.29653599858283997,
-0.5020652413368225,
0.4692700505256653,
0.01281109917908907,
-0.16071580350399017,
0.03388889133930206,
-0.020511148497462273,
0.5027827024459839,
-0.20729811489582065,
0.48107290267944336,
0.33669769763946533,
-0.5275911688804626,
0.48271527886390686,
0.2738940715789795,
-0.033152539283037186,
-0.13629786670207977,
-0.05965912342071533,
-0.26200807094573975,
0.04002794995903969,
-0.34095603227615356,
-3.986898899078369,
-0.46819332242012024,
-0.422744482755661,
-0.169097900390625,
0.6008929014205933,
0.058016058057546616,
-0.11401277780532836,
-0.3077819049358368,
-0.09595538675785063,
0.6723822355270386,
0.19367831945419312,
0.28304359316825867,
0.1609862744808197,
0.7567598819732666,
0.6889985799789429,
0.06907720118761063,
-0.04188092052936554,
-0.7434936165809631,
0.13321782648563385,
0.8456063270568848,
-0.10364038497209548,
-0.45084846019744873,
-0.4758241474628449,
0.43882066011428833,
-0.6432598829269409,
0.7217311859130859,
-0.24189773201942444,
0.12737572193145752,
-1.1008601188659668,
-0.3305315673351288,
0.14614742994308472,
-0.7819333076477051,
0.5287120342254639,
-0.055538054555654526,
0.1877404749393463,
-0.6907662153244019,
0.5616975426673889,
-0.4611121714115143,
-0.26109233498573303,
-0.12898315489292145,
-0.3724522292613983,
-0.7191406488418579,
-0.4425233602523804,
-0.644108235836029,
0.8424481153488159,
0.17532426118850708,
-0.5121750235557556,
-0.6467239260673523,
-0.0008507720194756985,
0.7866212129592896,
-0.02644744887948036,
-0.005045140627771616,
0.015782782807946205,
0.16334445774555206,
-0.1913367658853531,
-0.13697923719882965,
-0.6684983372688293,
0.18346354365348816,
-0.341105580329895,
0.5427411198616028,
0.3779832422733307,
-0.6778115034103394,
-0.2931850254535675,
-0.8805161714553833,
-0.4212774932384491,
-0.5368952751159668,
-1.3937891721725464,
-1.225494146347046,
0.4276703894138336,
1.1205668449401855,
-0.6005299687385559,
0.15732505917549133,
-0.3914784789085388,
-1.357046604156494,
-0.4707142114639282,
-0.1497287154197693,
-0.25035548210144043,
-0.34328439831733704,
0.39083412289619446,
0.1623048633337021,
-0.9275814294815063,
-0.6430015563964844,
0.2973862886428833,
0.5580436587333679,
-0.6232585310935974,
-0.6611042022705078,
0.4015969038009643,
-1.0232892036437988,
-0.2585645020008087,
-0.5431421399116516,
0.5021264553070068,
-0.48601630330085754,
-0.010242084041237833,
0.5862035155296326,
0.7316920161247253,
0.4036808013916016,
0.4269520044326782,
-0.705938458442688,
0.7747307419776917,
0.10164368897676468,
0.7887958884239197,
-0.9612497091293336,
0.12755516171455383,
0.06812842190265656,
-0.022603651508688927,
0.14722754061222076,
-0.5588505268096924,
-0.20689940452575684,
0.3557641804218292,
-0.6812759637832642,
0.2860803008079529,
-0.38954633474349976,
0.1759403496980667,
-0.5678874850273132,
-0.1692986786365509,
-0.14578519761562347,
0.5711379051208496,
1.0208125114440918,
0.7759483456611633,
-0.372348427772522,
-0.5460885763168335,
0.7190321683883667,
-0.6914990544319153,
0.13365162909030914,
-0.4854792356491089,
0.4054908752441406,
0.4502798914909363,
-0.3041122555732727,
-0.06726965308189392,
-0.05570871382951737,
-0.0455719493329525,
0.4785125255584717,
0.8867972493171692,
0.4107886850833893,
0.6121342182159424,
-0.20477132499217987,
-0.5598517656326294,
-0.6443566679954529,
-0.5905212759971619,
-0.5571200251579285,
0.17573799192905426,
-0.28621870279312134,
0.1685224026441574,
0.09719007462263109,
-0.04223639518022537,
-0.28623101115226746,
-0.1449810117483139,
-0.3789580464363098,
-0.5227636098861694,
-0.049728814512491226,
0.7849089503288269,
0.16792525351047516,
0.9849340915679932,
-0.6559549570083618,
0.35723909735679626,
-0.6822739243507385,
1.2873116731643677,
0.19993330538272855,
0.03512010723352432,
-0.6972134113311768,
0.18453484773635864,
-0.2437680810689926,
0.2156416028738022,
0.5230382680892944,
0.22020135819911957,
0.8314080238342285,
0.15627102553844452,
-0.7330264449119568,
0.3888184726238251,
-0.22034703195095065,
0.5457669496536255,
-0.48084837198257446,
-0.45576658844947815,
-0.09287727624177931,
-0.06968110054731369,
0.35125672817230225,
-0.4278119504451752,
0.2038476765155792,
0.11392722278833388,
0.9433983564376832,
-0.4097744226455689,
0.035297419875860214,
-0.4274404048919678,
-0.25100165605545044,
1.0943366289138794,
-0.07634022831916809,
-0.2925529479980469,
-0.7512530088424683,
0.2649727463722229,
-0.4078235328197479,
-0.3372223973274231,
0.05190162733197212,
0.005654910113662481,
-0.0001571219472680241,
-0.35445958375930786,
-0.7837416529655457,
0.1500556766986847,
0.4383024573326111,
0.6099548935890198,
0.05951934307813645,
-0.21325334906578064,
0.0199207104742527,
-0.22704418003559113,
-0.6481077671051025,
0.37442275881767273,
-1.015955924987793,
0.38637226819992065,
-0.06489371508359909,
-0.494120329618454,
0.3469836115837097,
0.15402406454086304,
-0.7660972476005554,
-0.7053225040435791,
-0.25964751839637756,
0.014004424214363098,
-0.2860170006752014,
-0.17565494775772095,
-0.45117494463920593,
-0.0031954257283359766,
0.09676837921142578,
-0.514464259147644,
0.41698193550109863,
-0.21642713248729703,
-0.5398141145706177,
-0.3647628426551819,
0.37005379796028137,
0.239425927400589,
-0.08833975344896317,
0.934946596622467,
-0.48340797424316406,
0.6241437792778015,
-0.7253676652908325,
-0.04303571209311485,
1.1125205755233765,
-0.15692919492721558,
-0.2914651036262512,
-0.5117168426513672,
0.21365483105182648,
0.4924402534961701,
0.5269662141799927,
0.0352792888879776,
-0.149167999625206,
-0.6019760370254517,
0.08245442807674408,
0.4900692105293274,
0.518824577331543,
-0.00005570516441366635,
-0.553304135799408,
0.22217543423175812,
0.5047767758369446,
0.135724738240242,
1.1511540412902832,
-0.3541218340396881,
-0.9712511897087096,
0.8353699445724487,
-0.39227569103240967,
-0.9117669463157654,
-0.26349931955337524,
0.05597023293375969,
0.20695461332798004,
0.3178807199001312,
1.0663238763809204,
0.5062212347984314,
0.7288597822189331,
0.09899299591779707,
0.553720235824585,
0.675009548664093,
-0.20067055523395536,
0.3138423264026642,
-0.6886593103408813,
-0.2910398542881012,
-1.3186300992965698,
-0.4684459865093231,
-0.095743365585804,
-0.1257995069026947,
-0.4858281314373016,
-0.4935407340526581,
-0.3266896903514862,
-0.3928797245025635,
-0.40803104639053345,
-0.9975396394729614,
0.4229583740234375,
0.37309643626213074,
0.4431034922599793,
0.30364808440208435,
-0.3765178918838501,
0.5616499185562134,
0.16904796659946442,
-0.7343707084655762,
0.2560209631919861,
0.6166825294494629,
0.3200829327106476,
-0.4483652710914612,
0.16224201023578644,
-0.31495288014411926,
-0.42713335156440735,
0.7270734906196594,
0.7049484848976135,
-0.0571461021900177,
0.04477125033736229,
-0.6647796034812927,
1.183672308921814,
0.36199676990509033,
0.046881116926670074,
0.4515796303749085,
0.9278061985969543,
0.31471705436706543,
-0.7073333859443665,
-0.3443860113620758,
0.5440067052841187,
-0.15020819008350372,
-0.541202962398529,
0.5203295946121216,
1.2192286252975464,
-0.9983593225479126,
-0.18758884072303772,
0.2758221924304962,
-0.6511523723602295,
-0.1584404855966568,
-0.236241415143013,
0.2692437767982483,
-0.4941152036190033,
0.4987454116344452,
-0.3331359028816223,
0.3163745701313019,
0.745529294013977,
-0.2905873656272888,
0.13602906465530396,
0.4679684340953827,
1.0555986166000366,
1.075700044631958,
0.5368486046791077,
-0.5118206739425659,
0.8668332099914551,
-0.5726966857910156,
-0.7811751961708069,
0.1938626915216446,
-0.1929349899291992,
0.1757766306400299,
0.6384295225143433,
0.26462844014167786,
0.9542630314826964,
0.19313029944896695,
1.264248013496399,
-0.6304428577423096,
0.0487106591463089,
-0.16211535036563873,
-0.7894763350486755,
0.3582514822483063,
-0.04153040423989296,
0.635784387588501,
0.6554391980171204,
-0.47010496258735657,
-0.8302040696144104,
-0.1350124627351761,
0.2568812072277069,
0.13614831864833832,
-0.2563649117946625,
-1.0434694290161133,
0.3232482671737671,
0.47882452607154846,
0.4298652410507202,
1.0563770532608032,
-0.28917592763900757,
-0.8533256649971008,
0.10648339986801147,
0.6376127004623413,
-0.20832888782024384,
0.2370245456695557,
0.0018312990432605147,
-0.2034837007522583,
0.01051164511591196,
-1.105310082435608,
0.29724350571632385,
0.15604574978351593,
0.1973688006401062,
0.44394731521606445,
0.3974513411521912,
-0.13625948131084442,
0.9571986198425292,
0.2257384955883026,
0.2323588728904724,
-0.5583669543266296,
-0.7854922413825989,
0.1647188365459442,
-1.6098142862319946,
0.318587988615036,
-0.13399995863437653,
-0.2172701060771942,
-0.767514705657959,
-0.5813586711883545,
-0.3195130527019501,
-0.04894036799669266,
0.2929930090904236,
-0.8213384747505188,
0.07181350141763687,
0.7469993829727173,
0.6407455801963806,
0.16365697979927063,
0.7870153188705444,
0.6524736881256104,
0.6399973630905151,
-0.04992736503481865,
-0.03959266096353531,
-0.2512352466583252,
0.8448855876922607,
-0.1422702670097351,
0.1216789186000824,
-1.2647287845611572,
0.5931149125099182,
0.7186052203178406,
-0.06118432432413101,
-1.1942816972732544,
-0.17677085101604462,
0.31543800234794617,
-0.32252824306488037,
0.8255583047866821,
-0.14529970288276672,
-0.2695446312427521,
-0.33378756046295166,
-0.1653425395488739,
0.1454019844532013,
-0.3920115828514099,
0.912214994430542,
-0.7279734015464783,
0.7374742031097412,
0.933980405330658,
0.13429680466651917,
-0.514870285987854,
0.3989711999893189,
-0.11613689363002776,
0.4022413492202759,
-0.9990655779838562,
-0.33749932050704956,
-0.4334589838981629,
-1.376373291015625,
-0.2993924915790558,
-0.09454808384180068,
-0.01314175222069025,
-0.001090060803107917,
0.2137461006641388,
0.2938512861728668,
0.17508235573768616,
0.8260607123374939,
-0.7218498587608337,
0.2414487451314926,
-0.47296759486198425,
-0.3002610504627228,
-1.238540768623352,
0.08663805574178696,
0.6805586218833923,
0.5909030437469482,
-0.42807504534721375,
-0.22887496650218964,
0.47537800669670105,
-1.0474627017974854,
0.6338009238243103,
0.06548397243022919,
0.4971011281013489,
1.3484878540039063
]
],
"regenerate": true
}
}
}

View File

@ -1,785 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e1",
"desc": "natural vector",
"_vectors": {
"default": {
"embeddings": [
[
-0.2979458272457123,
-0.5288640856742859,
-0.019957859069108963,
-0.18495318293571472,
0.7429973483085632,
0.5238497257232666,
0.432366281747818,
0.32744166254997253,
0.0020762972999364138,
-0.9507834911346436,
-0.35097137093544006,
0.08469701558351517,
-1.4176613092422483,
0.4647577106952667,
-0.69340580701828,
1.0372896194458008,
0.3716741800308227,
0.06031008064746857,
-0.6152024269104004,
0.007914665155112743,
0.7954924702644348,
-0.20773003995418549,
0.09376765787601472,
0.04508133605122566,
-0.2084471583366394,
-0.1518009901046753,
0.018195509910583496,
-0.07044368237257004,
-0.18119366466999057,
-0.4480230510234833,
0.3822529911994934,
0.1911812424659729,
0.4674372375011444,
0.06963984668254852,
-0.09341949224472046,
0.005675444379448891,
-0.6774799227714539,
-0.7066726684570313,
-0.39256376028060913,
0.04005039855837822,
0.2084812968969345,
-0.7872875928878784,
-0.8205880522727966,
0.2919981777667999,
-0.06004738807678223,
-0.4907574355602264,
-1.5937862396240234,
0.24249385297298431,
-0.14709846675395966,
-0.11860740929841997,
-0.8299489617347717,
0.472964346408844,
-0.497518390417099,
-0.22205302119255063,
-0.4196169078350067,
0.32697558403015137,
-0.360930860042572,
-0.9789686799049376,
0.1887447088956833,
-0.403737336397171,
0.18524253368377688,
0.3768732249736786,
0.3666233420372009,
0.3511938452720642,
0.6985810995101929,
0.41721710562705994,
0.09754953533411026,
0.6204307079315186,
-1.0762996673583984,
-0.06263761967420578,
-0.7376511693000793,
0.6849768161773682,
-0.1745152473449707,
-0.40449759364128113,
0.20757411420345304,
-0.8424443006515503,
0.330015629529953,
0.3489064872264862,
1.0954371690750122,
0.8487558960914612,
1.1076823472976685,
0.61430823802948,
0.4155903458595276,
0.4111340939998626,
0.05753209814429283,
-0.06429877132177353,
-0.765606164932251,
-0.41703930497169495,
-0.508820652961731,
0.19859947264194489,
-0.16607828438282013,
-0.28112146258354187,
0.11032675206661224,
0.38809511065483093,
-0.36498191952705383,
-0.48671194911003113,
0.6755134463310242,
0.03958442434668541,
0.4478721618652344,
-0.10335399955511092,
-0.9546685814857484,
-0.6087718605995178,
0.17498846352100372,
0.08320838958024979,
-1.4478336572647097,
-0.605027437210083,
-0.5867993235588074,
-0.14711688458919525,
-0.5447602272033691,
-0.026259321719408035,
-0.6997418403625488,
-0.07349082082509995,
0.10638900846242905,
-0.7133527398109436,
-0.9396815299987792,
1.087092399597168,
1.1885089874267578,
0.4011896848678589,
-0.4089202582836151,
-0.10938972979784012,
0.6726722121238708,
0.24576938152313232,
-0.24247920513153076,
1.1499971151351929,
0.47813335061073303,
-0.05331678315997124,
0.32338133454322815,
0.4870913326740265,
-0.23144258558750153,
-1.2023426294326782,
0.2349330335855484,
1.080536961555481,
0.29334118962287903,
0.391574501991272,
-0.15818795561790466,
-0.2948290705680847,
-0.024689948186278343,
0.06602869182825089,
0.5937030911445618,
-0.047901444137096405,
-0.512734591960907,
-0.35780075192451477,
0.28751692175865173,
0.4298716187477112,
0.9242428541183472,
-0.17208744585514069,
0.11515070497989656,
-0.0335976779460907,
-0.3422986567020416,
0.5344581604003906,
0.19895796477794647,
0.33001241087913513,
0.6390730142593384,
-0.6074934005737305,
-0.2553696632385254,
0.9644920229911804,
0.2699219584465027,
0.6403993368148804,
-0.6380003690719604,
-0.027310986071825027,
0.638815701007843,
0.27719101309776306,
-0.13553589582443237,
0.750195324420929,
0.1224869191646576,
-0.20613941550254825,
0.8444448709487915,
0.16200250387191772,
-0.24750925600528717,
-0.739950954914093,
-0.28443849086761475,
-1.176282525062561,
0.516107976436615,
0.3774825632572174,
0.10906043648719788,
0.07962015271186829,
0.7384604215621948,
-0.051241904497146606,
1.1730090379714966,
-0.4828610122203827,
-1.404372215270996,
0.8811132311820984,
-0.3839482367038727,
0.022516896948218346,
-0.0491158664226532,
-0.43027013540267944,
1.2049334049224854,
-0.27309560775756836,
0.6883630752563477,
0.8264574408531189,
-0.5020735263824463,
-0.4874092042446137,
0.6007202863693237,
-0.4965405762195587,
1.1302915811538696,
0.032572727650403976,
-0.3731859028339386,
0.658271849155426,
-0.9023059010505676,
0.7400162220001221,
0.014550759457051754,
-0.19699542224407196,
0.2319706380367279,
-0.789058268070221,
-0.14905710518360138,
-0.5826214551925659,
0.207652747631073,
-0.4507439732551574,
-0.3163885474205017,
0.3604124188423157,
-0.45119962096214294,
0.3428427278995514,
0.3005594313144684,
-0.36026081442832947,
1.1014249324798584,
0.40884315967559814,
0.34991952776908875,
-0.1806638240814209,
0.27440476417541504,
-0.7118373513221741,
0.4645499587059021,
0.214790478348732,
-0.2343102991580963,
0.10500429570674896,
-0.28034430742263794,
1.2267805337905884,
1.0561333894729614,
-0.497364342212677,
-0.6143305897712708,
0.24963727593421936,
-0.33136463165283203,
-0.01473914459347725,
0.495918869972229,
-0.6985538005828857,
-1.0033197402954102,
0.35937801003456116,
0.6325868368148804,
-0.6808838844299316,
1.0354058742523191,
-0.7214401960372925,
-0.33318862318992615,
0.874398410320282,
-0.6594992280006409,
0.6830640435218811,
-0.18534131348133087,
0.024834271520376205,
0.19901277124881744,
-0.5992477536201477,
-1.2126628160476685,
-0.9245557188987732,
-0.3898217976093292,
-0.1286519467830658,
0.4217943847179413,
-0.1143646091222763,
0.5630772709846497,
-0.5240639448165894,
0.21152715384960177,
-0.3792001008987427,
0.8266305327415466,
1.170984387397766,
-0.8072142004966736,
0.11382893472909927,
-0.17953898012638092,
-0.1789460331201553,
-0.15078622102737427,
-1.2082908153533936,
-0.7812382578849792,
-0.10903695970773696,
0.7303897142410278,
-0.39054441452026367,
0.19511254131793976,
-0.09121843427419662,
0.22400228679180145,
0.30143046379089355,
0.1141919493675232,
0.48112115263938904,
0.7307931780815125,
0.09701362252235413,
-0.2795647978782654,
-0.3997688889503479,
0.5540812611579895,
0.564578115940094,
-0.40065160393714905,
-0.3629159033298493,
-0.3789091110229492,
-0.7298538088798523,
-0.6996853351593018,
-0.4477842152118683,
-0.289089560508728,
-0.6430277824401855,
0.2344944179058075,
0.3742927014827728,
-0.5079357028007507,
0.28841453790664673,
0.06515737622976303,
0.707315981388092,
0.09498685598373412,
0.8365515470504761,
0.10002726316452026,
-0.7695478200912476,
0.6264724135398865,
0.7562043070793152,
-0.23112858831882477,
-0.2871039807796478,
-0.25010058283805847,
0.2783474028110504,
-0.03224996477365494,
-0.9119359850883484,
-3.6940200328826904,
-0.5099936127662659,
-0.1604711413383484,
0.17453284561634064,
0.41759559512138367,
0.1419190913438797,
-0.11362407356500626,
-0.33312007784843445,
0.11511333286762238,
0.4667884409427643,
-0.0031647447030991316,
0.15879854559898376,
0.3042248487472534,
0.5404849052429199,
0.8515422344207764,
0.06286454200744629,
0.43790125846862793,
-0.8682025074958801,
-0.06363756954669952,
0.5547921657562256,
-0.01483887154608965,
-0.07361344993114471,
-0.929947018623352,
0.3502565622329712,
-0.5080993175506592,
1.0380364656448364,
-0.2017953395843506,
0.21319580078125,
-1.0763001441955566,
-0.556368887424469,
0.1949922740459442,
-0.6445739269256592,
0.6791343688964844,
0.21188358962535855,
0.3736183941364288,
-0.21800459921360016,
0.7597446441650391,
-0.3732394874095917,
-0.4710160195827484,
0.025146087631583217,
0.05341297015547752,
-0.9522109627723694,
-0.6000866889953613,
-0.08469046652317047,
0.5966026186943054,
0.3444081246852875,
-0.461188405752182,
-0.5279349088668823,
0.10296865552663804,
0.5175143480300903,
-0.20671147108078003,
0.13392412662506104,
0.4812754988670349,
0.2993808686733246,
-0.3005635440349579,
0.5141698122024536,
-0.6239235401153564,
0.2877119481563568,
-0.4452739953994751,
0.5621107816696167,
0.5047508478164673,
-0.4226335883140564,
-0.18578553199768064,
-1.1967322826385498,
0.28178197145462036,
-0.8692031502723694,
-1.1812998056411743,
-1.4526212215423584,
0.4645712077617645,
0.9327932000160216,
-0.6560136675834656,
0.461549699306488,
-0.5621527433395386,
-1.328449010848999,
-0.08676894754171371,
0.00021918353741057217,
-0.18864136934280396,
0.1259666532278061,
0.18240638077259064,
-0.14919660985469818,
-0.8965857625007629,
-0.7539900541305542,
0.013973715715110302,
0.504276692867279,
-0.704748272895813,
-0.6428424119949341,
0.6303996443748474,
-0.5404738187789917,
-0.31176653504371643,
-0.21262824535369873,
0.18736739456653595,
-0.7998970746994019,
0.039946746081113815,
0.7390344738960266,
0.4283199906349182,
0.3795057237148285,
0.07204607129096985,
-0.9230587482452391,
0.9440426230430604,
0.26272690296173096,
0.5598306655883789,
-1.0520871877670288,
-0.2677186131477356,
-0.1888762265443802,
0.30426350235939026,
0.4746131896972656,
-0.5746733546257019,
-0.4197768568992615,
0.8565112948417664,
-0.6767723560333252,
0.23448683321475983,
-0.2010004222393036,
0.4112907350063324,
-0.6497949957847595,
-0.418667733669281,
-0.4950824975967407,
0.44438859820365906,
1.026281714439392,
0.482397586107254,
-0.26220494508743286,
-0.3640787005424499,
0.5907743573188782,
-0.8771642446517944,
0.09708411991596222,
-0.3671700060367584,
0.4331349730491638,
0.619417667388916,
-0.2684665620326996,
-0.5123821496963501,
-0.1502324342727661,
-0.012190685607492924,
0.3580845892429352,
0.8617186546325684,
0.3493645489215851,
1.0270192623138428,
0.18297909200191495,
-0.5881339311599731,
-0.1733516901731491,
-0.5040576457977295,
-0.340370237827301,
-0.26767754554748535,
-0.28570041060447693,
-0.032928116619586945,
0.6029254794120789,
0.17397655546665192,
0.09346921741962431,
0.27815181016921997,
-0.46699589490890503,
-0.8148876428604126,
-0.3964351713657379,
0.3812595009803772,
0.13547226786613464,
0.7126688361167908,
-0.3473474085330963,
-0.06573959439992905,
-0.6483767032623291,
1.4808889627456665,
0.30924928188323975,
-0.5085946917533875,
-0.8613000512123108,
0.3048902451992035,
-0.4241599142551422,
0.15909206867218018,
0.5764641761779785,
-0.07879110425710678,
1.015336513519287,
0.07599356025457382,
-0.7025855779647827,
0.30047643184661865,
-0.35094937682151794,
0.2522146999835968,
-0.2338722199201584,
-0.8326804637908936,
-0.13695412874221802,
-0.03452421352267265,
0.47974953055381775,
-0.18385636806488037,
0.32438594102859497,
0.1797013282775879,
0.787494957447052,
-0.12579888105392456,
-0.07507286965847015,
-0.4389670491218567,
0.2720070779323578,
0.8138866424560547,
0.01974171027541161,
-0.3057698905467987,
-0.6709924936294556,
0.0885881632566452,
-0.2862754464149475,
0.03475658595561981,
-0.1285519152879715,
0.3838353455066681,
-0.2944154739379883,
-0.4204859137535095,
-0.4416137933731079,
0.13426260650157928,
0.36733248829841614,
0.573428750038147,
-0.14928072690963745,
-0.026076916605234143,
0.33286052942276,
-0.5340145826339722,
-0.17279052734375,
-0.01154550164937973,
-0.6620771884918213,
0.18390542268753052,
-0.08265615254640579,
-0.2489682286977768,
0.2429984211921692,
-0.044153645634651184,
-0.986578404903412,
-0.33574509620666504,
-0.5387663841247559,
0.19767941534519196,
0.12540718913078308,
-0.3403128981590271,
-0.4154576361179352,
0.17275673151016235,
0.09407442808151244,
-0.5414086580276489,
0.4393929839134216,
0.1725579798221588,
-0.4998118281364441,
-0.6926208138465881,
0.16552448272705078,
0.6659538149833679,
-0.10949844866991044,
0.986426830291748,
0.01748848147690296,
0.4003709554672241,
-0.5430638194084167,
0.35347291827201843,
0.6887399554252625,
0.08274628221988678,
0.13407137989997864,
-0.591465950012207,
0.3446292281150818,
0.6069018244743347,
0.1935492902994156,
-0.0989871397614479,
0.07008486241102219,
-0.8503749370574951,
-0.09507356584072112,
0.6259510517120361,
0.13934025168418884,
0.06392545253038406,
-0.4112265408039093,
-0.08475656062364578,
0.4974113404750824,
-0.30606114864349365,
1.111435890197754,
-0.018766529858112335,
-0.8422622680664063,
0.4325508773326874,
-0.2832120656967163,
-0.4859798848628998,
-0.41498348116874695,
0.015977520495653152,
0.5292825698852539,
0.4538311660289765,
1.1328668594360352,
0.22632671892642975,
0.7918671369552612,
0.33401933312416077,
0.7306135296821594,
0.3548600673675537,
0.12506209313869476,
0.8573207855224609,
-0.5818327069282532,
-0.6953738927841187,
-1.6171947717666626,
-0.1699674427509308,
0.6318262815475464,
-0.05671752244234085,
-0.28145185112953186,
-0.3976689279079437,
-0.2041076272726059,
-0.5495951175689697,
-0.5152917504310608,
-0.9309796094894408,
0.101932130753994,
0.1367802917957306,
0.1490798443555832,
0.5304336547851563,
-0.5082434415817261,
0.06688683480024338,
0.14657628536224365,
-0.782435953617096,
0.2962816655635834,
0.6965363621711731,
0.8496337532997131,
-0.3042965829372406,
0.04343798756599426,
0.0330701619386673,
-0.5662598013877869,
1.1086925268173218,
0.756072998046875,
-0.204134538769722,
0.2404300570487976,
-0.47848284244537354,
1.3659011125564575,
0.5645433068275452,
-0.15836156904697418,
0.43395575881004333,
0.5944653749465942,
1.0043466091156006,
-0.49446743726730347,
-0.5954391360282898,
0.5341240763664246,
0.020598189905285835,
-0.4036853015422821,
0.4473709762096405,
1.1998231410980225,
-0.9317775368690492,
-0.23321466147899628,
0.2052552700042725,
-0.7423108816146851,
-0.19917210936546328,
-0.1722569614648819,
-0.034072667360305786,
-0.00671181408688426,
0.46396249532699585,
-0.1372445821762085,
0.053376372903585434,
0.7392690777778625,
-0.38447609543800354,
0.07497968524694443,
0.5197252631187439,
1.3746477365493774,
0.9060075879096984,
0.20000585913658145,
-0.4053704142570496,
0.7497360110282898,
-0.34087055921554565,
-1.101803183555603,
0.273650586605072,
-0.5125769376754761,
0.22472351789474487,
0.480757474899292,
-0.19845178723335263,
0.8857700824737549,
0.30752456188201904,
1.1109285354614258,
-0.6768012642860413,
0.524367094039917,
-0.22495046257972717,
-0.4224412739276886,
0.40753406286239624,
-0.23133376240730288,
0.3297771215438843,
0.4905449151992798,
-0.6813114285469055,
-0.7543983459472656,
-0.5599071383476257,
0.14351597428321838,
-0.029278717935085297,
-0.3970443606376648,
-0.303079217672348,
0.24161772429943085,
0.008353390730917454,
-0.0062365154735744,
1.0824860334396362,
-0.3704061508178711,
-1.0337258577346802,
0.04638749733567238,
1.163011074066162,
-0.31737643480300903,
0.013986887410283089,
0.19223114848136905,
-0.2260770797729492,
-0.210910826921463,
-1.0191949605941772,
0.22356095910072327,
0.09353553503751756,
0.18096882104873657,
0.14867214858531952,
0.43408671021461487,
-0.33312076330184937,
0.8173948526382446,
0.6428242921829224,
0.20215003192424777,
-0.6634518504142761,
-0.4132290482521057,
0.29815030097961426,
-1.579406976699829,
-0.0981958732008934,
-0.03941014781594277,
0.1709178239107132,
-0.5481140613555908,
-0.5338194966316223,
-0.3528362512588501,
-0.11561278253793716,
-0.21793591976165771,
-1.1570470333099363,
0.2157980799674988,
0.42083489894866943,
0.9639263153076172,
0.09747201204299928,
0.15671424567699432,
0.4034591615200043,
0.6728067994117737,
-0.5216875672340393,
0.09657668322324751,
-0.2416689097881317,
0.747975766658783,
0.1021689772605896,
0.11652665585279463,
-1.0484966039657593,
0.8489304780960083,
0.7169828414916992,
-0.09012343734502792,
-1.3173753023147583,
0.057890523225069046,
-0.006231260951608419,
-0.1018214002251625,
0.936040461063385,
-0.0502331368625164,
-0.4284322261810303,
-0.38209280371665955,
-0.22668412327766416,
0.0782942995429039,
-0.4881664514541626,
0.9268959760665894,
0.001867273123934865,
0.42261114716529846,
0.8283362984657288,
0.4256294071674347,
-0.7965338826179504,
0.4840078353881836,
-0.19861412048339844,
0.33977967500686646,
-0.4604192078113556,
-0.3107339143753052,
-0.2839638590812683,
-1.5734281539916992,
0.005220232997089624,
0.09239906817674635,
-0.7828494906425476,
-0.1397123783826828,
0.2576255202293396,
0.21372435986995697,
-0.23169949650764465,
0.4016408920288086,
-0.462497353553772,
-0.2186472862958908,
-0.5617868900299072,
-0.3649831712245941,
-1.1585862636566162,
-0.08222806453704834,
0.931126832962036,
0.4327389597892761,
-0.46451422572135925,
-0.5430706143379211,
-0.27434298396110535,
-0.9479129314422609,
0.1845661848783493,
0.3972720205783844,
0.4883299469947815,
1.04031240940094
]
],
"regenerate": true
}
}
}

View File

@ -1,780 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e0",
"desc": "overriden vector",
"_vectors": {
"default": [
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
]
}
}

View File

@ -152,7 +152,6 @@ impl Settings<Unchecked> {
}
#[derive(Debug, Clone, Deserialize)]
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]

View File

@ -182,7 +182,6 @@ impl Settings<Unchecked> {
}
}
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
#[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]

View File

@ -200,7 +200,6 @@ impl std::ops::Deref for IndexUid {
}
}
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
#[cfg_attr(test, serde(rename_all = "camelCase"))]

View File

@ -568,7 +568,7 @@ pub mod tests {
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequences
// but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}

View File

@ -22,6 +22,7 @@ flate2 = "1.0.28"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
puffin = { version = "0.16.0", features = ["serialization"] }
rayon = "1.8.1"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
@ -36,13 +37,11 @@ time = { version = "0.3.31", features = [
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.7"
ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]
arroy = "0.4.0"
big_s = "1.0.2"
crossbeam = "0.8.4"
insta = { version = "1.34.0", features = ["json", "redactions"] }
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }

View File

@ -13,7 +13,7 @@ We can combine the two tasks in a single batch:
1. import documents X and Y
Processing this batch is functionally equivalent to processing the two
tasks individually, but should be much faster since we are only performing
tasks individally, but should be much faster since we are only performing
one indexing operation.
*/
@ -31,9 +31,6 @@ use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
};
use meilisearch_types::milli::vector::parsed_vectors::{
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
};
use meilisearch_types::milli::{self, Filter};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
@ -529,6 +526,8 @@ impl IndexScheduler {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
puffin::profile_function!();
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
@ -637,6 +636,8 @@ impl IndexScheduler {
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
puffin::profile_function!(batch.to_string());
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
@ -784,12 +785,10 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should
let auth = unsafe {
milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)
}?;
let auth = milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot
@ -909,67 +908,14 @@ impl IndexScheduler {
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(&rtxn)?;
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (id, doc) = ret?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
'inject_vectors: {
let embeddings = index.embeddings(&rtxn, id)?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(&rtxn, std::iter::once(id))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={id}")
}
},
value: vectors.clone(),
},
)
.into());
};
for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate: !user_provided,
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
}
}
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}
@ -1228,6 +1174,8 @@ impl IndexScheduler {
index: &'i Index,
operation: IndexOperation,
) -> Result<Vec<Task>> {
puffin::profile_function!();
match operation {
IndexOperation::DocumentClear { mut tasks, .. } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;

View File

@ -68,6 +68,19 @@ impl RoFeatures {
.into())
}
}
pub fn check_puffin(&self) -> Result<()> {
if self.runtime.export_puffin_reports {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Outputting Puffin reports to disk",
feature: "export puffin reports",
issue_link: "https://github.com/meilisearch/product/discussions/693",
}
.into())
}
}
}
impl FeatureData {

View File

@ -32,6 +32,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
features: _,
max_number_of_tasks: _,
max_number_of_batched_tasks: _,
puffin_frame: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: doc
---
{
"doggo": "Intel",
"breed": "beagle",
"_vectors": {
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@ -1,20 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"A_fakerest": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 384,
"url": "http://localhost:7777"
},
"B_small_hf": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
}
}
}

View File

@ -1,15 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: doc
---
{
"doggo": "kefir",
"breed": "patou",
"_vectors": {
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@ -1,23 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: fakerest_config.embedder_options
---
{
"Rest": {
"api_key": "My super secret",
"distribution": null,
"dimensions": 384,
"url": "http://localhost:7777",
"query": null,
"input_field": [
"input"
],
"path_to_embeddings": [
"data"
],
"embedding_object": [
"embedding"
],
"input_type": "text"
}
}

View File

@ -1,11 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: simple_hf_config.embedder_options
---
{
"HuggingFace": {
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"distribution": null
}
}

View File

@ -1,20 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"A_fakerest": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 384,
"url": "http://localhost:7777"
},
"B_small_hf": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
}
}
}

View File

@ -1,49 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,2,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,48 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [2,]
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,2,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,44 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -1,36 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -272,9 +272,9 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
}
for index_uid in index_uids {
if index_uid == swap.0 {
swap.1.clone_into(index_uid);
*index_uid = swap.1.to_owned();
} else if index_uid == swap.1 {
swap.0.clone_into(index_uid);
*index_uid = swap.0.to_owned();
}
}
}

View File

@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2);
unsafe { options.open(path) }
options.open(path)
}
impl HeedAuthStore {

View File

@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.6.0", default-features = false }
actix-web = { version = "4.5.1", default-features = false }
anyhow = "1.0.79"
convert_case = "0.6.0"
csv = "1.3.0"
@ -30,12 +30,7 @@ serde_json = "1.0.111"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.35"
uuid = { version = "1.6.1", features = ["serde", "v4"] }

View File

@ -26,7 +26,7 @@ pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C
/// A request deserialization error.
///
/// The first generic parameter is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
/// The first generic paramater is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
/// The second generic parameter is the default error code for the deserialization error, in case it is not given.
pub struct DeserrError<Format, C: Default + ErrorCode> {
pub msg: String,
@ -189,6 +189,3 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarId);

View File

@ -222,7 +222,6 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
@ -240,27 +239,18 @@ InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSimilarRetrieveVectors , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
InvalidSearchRetrieveVectors , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
InvalidSimilarFilter , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
InvalidSimilarLimit , InvalidRequest , BAD_REQUEST ;
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
InvalidSimilarOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
@ -269,18 +259,15 @@ InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
@ -335,8 +322,7 @@ UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
NotFoundSimilarId , InvalidRequest , BAD_REQUEST
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
}
impl ErrorCode for JoinError {
@ -385,7 +371,6 @@ impl ErrorCode for milli::Error {
Code::IndexPrimaryKeyMultipleCandidatesFound
}
UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists,
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
@ -398,8 +383,8 @@ impl ErrorCode for milli::Error {
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
@ -438,6 +423,7 @@ impl ErrorCode for HeedError {
HeedError::Mdb(_)
| HeedError::Encoding(_)
| HeedError::Decoding(_)
| HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing
| HeedError::BadOpenOptions { .. } => Code::Internal,
}
@ -502,32 +488,6 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
}
}
impl fmt::Display for deserr_codes::InvalidSimilarId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `id` is invalid. \
A document identifier can be of type integer or string, \
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."
)
}
}
impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
)
}
}
impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f)
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@ -6,6 +6,7 @@ pub struct RuntimeTogglableFeatures {
pub vector_store: bool,
pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]

View File

@ -8,7 +8,6 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::index::IndexEmbeddingConfig;
use milli::proximity::ProximityPrecision;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
@ -673,7 +672,7 @@ pub fn settings(
let embedders: BTreeMap<_, _> = index
.embedding_configs(rtxn)?
.into_iter()
.map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into())))
.map(|(name, config)| (name, Setting::Set(config.into())))
.collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };

View File

@ -14,20 +14,20 @@ default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.7.0", default-features = false, features = [
actix-http = { version = "3.6.0", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls-0_21",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.6.0", default-features = false, features = [
actix-web = { version = "4.5.1", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls-0_21",
] }
actix-web-static-files = { version = "4.0.1", optional = true }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] }
async-stream = "0.3.5"
async-trait = "0.1.77"
@ -67,6 +67,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.13"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5"
rayon = "1.8.0"
regex = "1.10.2"
@ -74,7 +75,7 @@ reqwest = { version = "0.11.23", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.21.12"
rustls = "0.21.6"
rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }
@ -105,20 +106,19 @@ url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.10"
tracing-actix-web = "0.7.9"
build-info = { version = "1.7.0", path = "../build-info" }
[dev-dependencies]
actix-rt = "2.9.0"
assert-json-diff = "2.0.2"
brotli = "6.0.0"
brotli = "3.4.0"
insta = "1.34.0"
manifest-dir-macros = "0.1.18"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
wiremock = "0.6.0"
yaup = "0.2.1"
[build-dependencies]
@ -159,5 +159,5 @@ vietnamese = ["meilisearch-types/vietnamese"]
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"

View File

@ -25,18 +25,6 @@ impl SearchAggregator {
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct SimilarAggregator;
#[allow(dead_code)]
impl SimilarAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct MultiSearchAggregator;
@ -78,8 +66,6 @@ impl Analytics for MockAnalytics {
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(

View File

@ -22,8 +22,6 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(not(feature = "analytics"))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(not(feature = "analytics"))]
pub type SimilarAggregator = mock_analytics::SimilarAggregator;
#[cfg(not(feature = "analytics"))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(not(feature = "analytics"))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
@ -34,8 +32,6 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(feature = "analytics")]
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(feature = "analytics")]
pub type SimilarAggregator = segment_analytics::SimilarAggregator;
#[cfg(feature = "analytics")]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(feature = "analytics")]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
@ -74,8 +70,8 @@ pub enum DocumentDeletionKind {
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
PerDocumentId,
Normal { with_filter: bool, limit: usize, offset: usize },
}
pub trait Analytics: Sync + Send {
@ -90,12 +86,6 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post search
fn post_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a get similar request
fn get_similar(&self, aggregate: SimilarAggregator);
/// This method should be called to aggregate a post similar request
fn post_similar(&self, aggregate: SimilarAggregator);
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);

View File

@ -36,9 +36,8 @@ use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEMANTIC_RATIO,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
};
use crate::Opt;
@ -74,8 +73,6 @@ pub enum AnalyticsMsg {
BatchMessage(Track),
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregateGetSimilar(SimilarAggregator),
AggregatePostSimilar(SimilarAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregatePostFacetSearch(FacetSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
@ -152,8 +149,6 @@ impl SegmentAnalytics {
update_documents_aggregator: DocumentsAggregator::default(),
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
get_similar_aggregator: SimilarAggregator::default(),
post_similar_aggregator: SimilarAggregator::default(),
});
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
@ -189,14 +184,6 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn get_similar(&self, aggregate: SimilarAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate));
}
fn post_similar(&self, aggregate: SimilarAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate));
}
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
}
@ -392,8 +379,6 @@ pub struct Segment {
update_documents_aggregator: DocumentsAggregator,
get_fetch_documents_aggregator: DocumentsFetchAggregator,
post_fetch_documents_aggregator: DocumentsFetchAggregator,
get_similar_aggregator: SimilarAggregator,
post_similar_aggregator: SimilarAggregator,
}
impl Segment {
@ -456,8 +441,6 @@ impl Segment {
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg),
None => (),
}
}
@ -511,8 +494,6 @@ impl Segment {
update_documents_aggregator,
get_fetch_documents_aggregator,
post_fetch_documents_aggregator,
get_similar_aggregator,
post_similar_aggregator,
} = self;
if let Some(get_search) =
@ -560,18 +541,6 @@ impl Segment {
{
let _ = self.batcher.push(post_fetch_documents).await;
}
if let Some(get_similar_documents) =
take(get_similar_aggregator).into_event(user, "Similar GET")
{
let _ = self.batcher.push(get_similar_documents).await;
}
if let Some(post_similar_documents) =
take(post_similar_aggregator).into_event(user, "Similar POST")
{
let _ = self.batcher.push(post_similar_documents).await;
}
let _ = self.batcher.flush().await;
}
}
@ -597,9 +566,6 @@ pub struct SearchAggregator {
// every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize,
// distinct
distinct: bool,
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
@ -625,7 +591,6 @@ pub struct SearchAggregator {
// Whether a non-default embedder was specified
embedder: bool,
hybrid: bool,
retrieve_vectors: bool,
// every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>,
@ -652,7 +617,6 @@ pub struct SearchAggregator {
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
ranking_score_threshold: bool,
}
impl SearchAggregator {
@ -666,7 +630,6 @@ impl SearchAggregator {
page,
hits_per_page,
attributes_to_retrieve: _,
retrieve_vectors,
attributes_to_crop: _,
crop_length,
attributes_to_highlight: _,
@ -675,7 +638,6 @@ impl SearchAggregator {
show_ranking_score_details,
filter,
sort,
distinct,
facets: _,
highlight_pre_tag,
highlight_post_tag,
@ -683,7 +645,6 @@ impl SearchAggregator {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = query;
let mut ret = Self::default();
@ -698,8 +659,6 @@ impl SearchAggregator {
ret.sort_sum_of_criteria_terms = sort.len();
}
ret.distinct = distinct.is_some();
if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1;
@ -736,7 +695,6 @@ impl SearchAggregator {
if let Some(ref vector) = vector {
ret.max_vector_size = vector.len();
}
ret.retrieve_vectors |= retrieve_vectors;
if query.is_finite_pagination() {
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
@ -759,7 +717,6 @@ impl SearchAggregator {
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
@ -804,7 +761,6 @@ impl SearchAggregator {
sort_with_geo_point,
sort_sum_of_criteria_terms,
sort_total_number_of_criteria,
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
@ -813,7 +769,6 @@ impl SearchAggregator {
attributes_to_search_on_total_number_of_uses,
max_terms_number,
max_vector_size,
retrieve_vectors,
matching_strategy,
max_limit,
max_offset,
@ -835,7 +790,6 @@ impl SearchAggregator {
hybrid,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
} = other;
if self.timestamp.is_none() {
@ -862,9 +816,6 @@ impl SearchAggregator {
self.sort_total_number_of_criteria =
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
// distinct
self.distinct |= distinct;
// filter
self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
@ -887,7 +838,6 @@ impl SearchAggregator {
// vector
self.max_vector_size = self.max_vector_size.max(max_vector_size);
self.retrieve_vectors |= retrieve_vectors;
self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid;
self.embedder |= embedder;
@ -923,7 +873,6 @@ impl SearchAggregator {
// scoring
self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -936,7 +885,6 @@ impl SearchAggregator {
sort_with_geo_point,
sort_sum_of_criteria_terms,
sort_total_number_of_criteria,
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
@ -945,7 +893,6 @@ impl SearchAggregator {
attributes_to_search_on_total_number_of_uses,
max_terms_number,
max_vector_size,
retrieve_vectors,
matching_strategy,
max_limit,
max_offset,
@ -967,7 +914,6 @@ impl SearchAggregator {
hybrid,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
} = self;
if total_received == 0 {
@ -994,7 +940,6 @@ impl SearchAggregator {
"with_geoPoint": sort_with_geo_point,
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
},
"distinct": distinct,
"filter": {
"with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box,
@ -1009,7 +954,6 @@ impl SearchAggregator {
},
"vector": {
"max_vector_size": max_vector_size,
"retrieve_vectors": retrieve_vectors,
},
"hybrid": {
"enabled": hybrid,
@ -1040,7 +984,6 @@ impl SearchAggregator {
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
});
@ -1098,7 +1041,6 @@ impl MultiSearchAggregator {
page: _,
hits_per_page: _,
attributes_to_retrieve: _,
retrieve_vectors: _,
attributes_to_crop: _,
crop_length: _,
attributes_to_highlight: _,
@ -1107,7 +1049,6 @@ impl MultiSearchAggregator {
show_matches_position: _,
filter: _,
sort: _,
distinct: _,
facets: _,
highlight_pre_tag: _,
highlight_post_tag: _,
@ -1115,7 +1056,6 @@ impl MultiSearchAggregator {
matching_strategy: _,
attributes_to_search_on: _,
hybrid: _,
ranking_score_threshold: _,
} = query;
index_uid.as_str()
@ -1263,7 +1203,6 @@ impl FacetSearchAggregator {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = query;
let mut ret = Self::default();
@ -1278,8 +1217,7 @@ impl FacetSearchAggregator {
|| filter.is_some()
|| *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some()
|| hybrid.is_some()
|| ranking_score_threshold.is_some();
|| hybrid.is_some();
ret
}
@ -1555,9 +1493,6 @@ pub struct DocumentsFetchAggregator {
// if a filter was used
per_filter: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
// pagination
#[serde(rename = "pagination.max_limit")]
max_limit: usize,
@ -1567,21 +1502,18 @@ pub struct DocumentsFetchAggregator {
impl DocumentsFetchAggregator {
pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
let (limit, offset, retrieve_vectors) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
(*limit, *offset, *retrieve_vectors)
}
let (limit, offset) = match query {
DocumentFetchKind::PerDocumentId => (1, 0),
DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
};
Self {
timestamp: Some(OffsetDateTime::now_utc()),
user_agents: extract_user_agents(request).into_iter().collect(),
total_received: 1,
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
retrieve_vectors,
}
}
@ -1595,7 +1527,6 @@ impl DocumentsFetchAggregator {
per_filter,
max_limit,
max_offset,
retrieve_vectors,
} = other;
if self.timestamp.is_none() {
@ -1611,8 +1542,6 @@ impl DocumentsFetchAggregator {
self.max_limit = self.max_limit.max(max_limit);
self.max_offset = self.max_offset.max(max_offset);
self.retrieve_vectors |= retrieve_vectors;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -1629,251 +1558,3 @@ impl DocumentsFetchAggregator {
})
}
}
#[derive(Default)]
pub struct SimilarAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
filter_total_number_of_criteria: usize,
used_syntax: HashMap<String, usize>,
// Whether a non-default embedder was specified
embedder: bool,
retrieve_vectors: bool,
// pagination
max_limit: usize,
max_offset: usize,
// formatting
max_attributes_to_retrieve: usize,
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
ranking_score_threshold: bool,
}
impl SimilarAggregator {
#[allow(clippy::field_reassign_with_default)]
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
let SimilarQuery {
id: _,
embedder,
offset,
limit,
attributes_to_retrieve: _,
retrieve_vectors,
show_ranking_score,
show_ranking_score_details,
filter,
ranking_score_threshold,
} = query;
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1;
let syntax = match filter {
Value::String(_) => "string".to_string(),
Value::Array(values) => {
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
"mixed".to_string()
} else {
"array".to_string()
}
}
_ => "none".to_string(),
};
// convert the string to a HashMap
ret.used_syntax.insert(syntax, 1);
let stringified_filters = filter.to_string();
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
}
ret.max_limit = *limit;
ret.max_offset = *offset;
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
ret.embedder = embedder.is_some();
ret.retrieve_vectors = *retrieve_vectors;
ret
}
pub fn succeed(&mut self, result: &SimilarResult) {
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(*processing_time_ms as usize);
}
/// Aggregate one [SimilarAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
let Self {
timestamp,
user_agents,
total_received,
total_succeeded,
ref mut time_spent,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
max_limit,
max_offset,
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
retrieve_vectors,
} = other;
if self.timestamp.is_none() {
self.timestamp = timestamp;
}
// context
for user_agent in user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request
self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.time_spent.append(time_spent);
// filter
self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
self.filter_sum_of_criteria_terms =
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
self.filter_total_number_of_criteria =
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
for (key, value) in used_syntax.into_iter() {
let used_syntax = self.used_syntax.entry(key).or_insert(0);
*used_syntax = used_syntax.saturating_add(value);
}
self.embedder |= embedder;
self.retrieve_vectors |= retrieve_vectors;
// pagination
self.max_limit = self.max_limit.max(max_limit);
self.max_offset = self.max_offset.max(max_offset);
// formatting
self.max_attributes_to_retrieve =
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
// scoring
self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
let Self {
timestamp,
user_agents,
total_received,
total_succeeded,
time_spent,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
max_limit,
max_offset,
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
retrieve_vectors,
} = self;
if total_received == 0 {
None
} else {
// we get all the values in a sorted manner
let time_spent = time_spent.into_sorted_vec();
// the index of the 99th percentage of value
let percentile_99th = time_spent.len() * 99 / 100;
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th);
let properties = json!({
"user-agent": user_agents,
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
},
"filter": {
"with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box,
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},
"vector": {
"retrieve_vectors": retrieve_vectors,
},
"hybrid": {
"embedder": embedder,
},
"pagination": {
"max_limit": max_limit,
"max_offset": max_offset,
},
"formatting": {
"max_attributes_to_retrieve": max_attributes_to_retrieve,
},
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
});
Some(Track {
timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}

View File

@ -59,12 +59,10 @@ where
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_pattern = req.match_pattern();
let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str);
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, metric_path])
.with_label_values(&[&request_method, request_path])
.start_timer(),
);
}

View File

@ -47,6 +47,8 @@ pub struct RuntimeTogglableFeatures {
pub metrics: Option<bool>,
#[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
}
async fn patch_features(
@ -66,13 +68,21 @@ async fn patch_features(
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features
.0
.export_puffin_reports
.unwrap_or(old_features.export_puffin_reports),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
new_features;
let meilisearch_types::features::RuntimeTogglableFeatures {
vector_store,
metrics,
logs_route,
export_puffin_reports,
} = new_features;
analytics.publish(
"Experimental features Updated".to_string(),
@ -80,6 +90,7 @@ async fn patch_features(
"vector_store": vector_store,
"metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);

View File

@ -16,7 +16,6 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
@ -40,7 +39,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
};
use crate::search::{parse_filter, RetrieveVectors};
use crate::search::parse_filter;
use crate::Opt;
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
@ -95,8 +94,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub struct GetDocument {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
fields: OptionStarOrList<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: Param<bool>,
}
pub async fn get_document(
@ -110,20 +107,13 @@ pub async fn get_document(
debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?;
let GetDocument { fields, retrieve_vectors: param_retrieve_vectors } = params.into_inner();
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.merge_star_and_none();
let features = index_scheduler.features();
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
analytics.get_fetch_documents(
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
&req,
);
let index = index_scheduler.index(&index_uid)?;
let document =
retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
debug!(returns = ?document, "Get document");
Ok(HttpResponse::Ok().json(document))
}
@ -163,8 +153,6 @@ pub struct BrowseQueryGet {
limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
fields: OptionStarOrList<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>,
}
@ -178,8 +166,6 @@ pub struct BrowseQuery {
limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
fields: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
}
@ -199,7 +185,6 @@ pub async fn documents_by_query_post(
with_filter: body.filter.is_some(),
limit: body.limit,
offset: body.offset,
retrieve_vectors: body.retrieve_vectors,
},
&req,
);
@ -216,7 +201,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
let filter = match filter {
Some(f) => match serde_json::from_str(&f) {
@ -230,7 +215,6 @@ pub async fn get_documents(
offset: offset.0,
limit: limit.0,
fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0,
filter,
};
@ -239,7 +223,6 @@ pub async fn get_documents(
with_filter: query.filter.is_some(),
limit: query.limit,
offset: query.offset,
retrieve_vectors: query.retrieve_vectors,
},
&req,
);
@ -253,14 +236,10 @@ fn documents_by_query(
query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
let features = index_scheduler.features();
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
let BrowseQuery { offset, limit, fields, filter } = query;
let index = index_scheduler.index(&index_uid)?;
let (total, documents) =
retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@ -600,44 +579,13 @@ fn some_documents<'a, 't: 'a>(
index: &'a Index,
rtxn: &'t RoTxn,
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
retrieve_vectors: RetrieveVectors,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(rtxn)?;
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
match retrieve_vectors {
RetrieveVectors::Ignore => {}
RetrieveVectors::Hide => {
document.remove("_vectors");
}
RetrieveVectors::Retrieve => {
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, key)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(key));
let embeddings = ExplicitVectors {
embeddings: Some(vector.into()),
regenerate: !user_provided,
};
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
);
}
document.insert("_vectors".into(), vectors.into());
}
}
Ok(document)
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
@ -648,7 +596,6 @@ fn retrieve_documents<S: AsRef<str>>(
limit: usize,
filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
@ -673,57 +620,53 @@ fn retrieve_documents<S: AsRef<str>>(
let (it, number_of_documents) = {
let number_of_documents = candidates.len();
(
some_documents(
index,
&rtxn,
candidates.into_iter().skip(offset).take(limit),
retrieve_vectors,
)?,
some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
number_of_documents,
)
};
let documents: Vec<_> = it
let documents: Result<Vec<_>, ResponseError> = it
.map(|document| {
Ok(match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()).chain(
(retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"),
),
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
})
})
.collect::<Result<_, ResponseError>>()?;
.collect();
Ok((number_of_documents, documents))
Ok((number_of_documents, documents?))
}
fn retrieve_document<S: AsRef<str>>(
index: &Index,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
) -> Result<Document, ResponseError> {
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids()
.get(&txn, doc_id)?
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = some_documents(index, &txn, Some(internal_id), retrieve_vectors)?
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))??;
.map(|(_, d)| d)
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
attributes_to_retrieve
.iter()
.map(|s| s.as_ref())
.chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")),
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
};

View File

@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
use crate::search_queue::SearchQueue;
@ -46,8 +46,6 @@ pub struct FacetSearchQuery {
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
pub async fn search(
@ -71,7 +69,7 @@ pub async fn search(
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut search_query.filter, search_rules);
add_search_rules(&mut search_query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
@ -105,7 +103,6 @@ impl From<FacetSearchQuery> for SearchQuery {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = value;
SearchQuery {
@ -115,7 +112,6 @@ impl From<FacetSearchQuery> for SearchQuery {
page: None,
hits_per_page: None,
attributes_to_retrieve: None,
retrieve_vectors: false,
attributes_to_crop: None,
crop_length: DEFAULT_CROP_LENGTH(),
attributes_to_highlight: None,
@ -124,7 +120,6 @@ impl From<FacetSearchQuery> for SearchQuery {
show_ranking_score_details: false,
filter,
sort: None,
distinct: None,
facets: None,
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
@ -133,7 +128,6 @@ impl From<FacetSearchQuery> for SearchQuery {
vector,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
}
}
}

View File

@ -29,7 +29,6 @@ pub mod documents;
pub mod facet_search;
pub mod search;
pub mod settings;
pub mod similar;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
@ -49,7 +48,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/similar").configure(similar::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}

View File

@ -19,10 +19,9 @@ use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
@ -51,8 +50,6 @@ pub struct SearchQueryGet {
hits_per_page: Option<Param<usize>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
attributes_to_retrieve: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
attributes_to_crop: Option<CS<String>>,
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
@ -63,8 +60,6 @@ pub struct SearchQueryGet {
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
sort: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
distinct: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
@ -87,21 +82,6 @@ pub struct SearchQueryGet {
pub hybrid_embedder: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
@ -157,13 +137,11 @@ impl From<SearchQueryGet> for SearchQuery {
page: other.page.as_deref().copied(),
hits_per_page: other.hits_per_page.as_deref().copied(),
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
retrieve_vectors: other.retrieve_vectors.0,
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
crop_length: other.crop_length.0,
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
distinct: other.distinct,
show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0,
@ -174,7 +152,6 @@ impl From<SearchQueryGet> for SearchQuery {
matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
}
}
}
@ -219,7 +196,7 @@ pub async fn search_with_url_query(
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
add_search_rules(&mut query, search_rules);
}
let mut aggregate = SearchAggregator::from_query(&query, &req);
@ -228,12 +205,10 @@ pub async fn search_with_url_query(
let features = index_scheduler.features();
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector)
})
.await?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
@ -260,7 +235,7 @@ pub async fn search_with_post(
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
add_search_rules(&mut query, search_rules);
}
let mut aggregate = SearchAggregator::from_query(&query, &req);
@ -270,13 +245,10 @@ pub async fn search_with_post(
let features = index_scheduler.features();
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vectors)
})
.await?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
if search_result.degraded {
@ -298,10 +270,11 @@ pub fn search_kind(
features: RoFeatures,
) -> Result<SearchKind, ResponseError> {
if query.vector.is_some() {
features.check_vector("Passing `vector` as a parameter")?;
features.check_vector("Passing `vector` as a query parameter")?;
}
if query.hybrid.is_some() {
features.check_vector("Passing `hybrid` as a parameter")?;
features.check_vector("Passing `hybrid` as a query parameter")?;
}
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing

View File

@ -1,192 +0,0 @@
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{ErrorCode as _, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
use super::ActionPolicy;
use crate::analytics::{Analytics, SimilarAggregator};
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(similar_get)))
.route(web::post().to(SeqHandler(similar_post))),
);
}
pub async fn similar_get(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.try_into()?;
let mut aggregate = SimilarAggregator::from_query(&query, &req);
debug!(parameters = ?query, "Similar get");
let similar = similar(index_scheduler, index_uid, query).await;
if let Ok(similar) = &similar {
aggregate.succeed(similar);
}
analytics.get_similar(aggregate);
let similar = similar?;
debug!(returns = ?similar, "Similar get");
Ok(HttpResponse::Ok().json(similar))
}
pub async fn similar_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<SimilarQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!(parameters = ?query, "Similar post");
let mut aggregate = SimilarAggregator::from_query(&query, &req);
let similar = similar(index_scheduler, index_uid, query).await;
if let Ok(similar) = &similar {
aggregate.succeed(similar);
}
analytics.post_similar(aggregate);
let similar = similar?;
debug!(returns = ?similar, "Similar post");
Ok(HttpResponse::Ok().json(similar))
}
async fn similar(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: IndexUid,
mut query: SimilarQuery,
) -> Result<SimilarResult, ResponseError> {
let features = index_scheduler.features();
features.check_vector("Using the similar API")?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let (embedder_name, embedder) =
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
tokio::task::spawn_blocking(move || {
perform_similar(&index, query, embedder_name, embedder, retrieve_vectors)
})
.await?
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct SimilarQueryGet {
#[deserr(error = DeserrQueryParamError<InvalidSimilarId>)]
id: Param<String>,
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSimilarOffset>)]
offset: Param<usize>,
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSimilarLimit>)]
limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
attributes_to_retrieve: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
}
}
impl TryFrom<SimilarQueryGet> for SimilarQuery {
type Error = ResponseError;
fn try_from(
SimilarQueryGet {
id,
offset,
limit,
attributes_to_retrieve,
retrieve_vectors,
filter,
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
}: SimilarQueryGet,
) -> Result<Self, Self::Error> {
let filter = match filter {
Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v),
_ => Some(Value::String(f)),
},
None => None,
};
Ok(SimilarQuery {
id: id.0.try_into().map_err(|code: InvalidSimilarId| {
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0,
limit: limit.0,
filter,
embedder,
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
retrieve_vectors: retrieve_vectors.0,
show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
})
}
}

View File

@ -15,7 +15,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
};
use crate::search_queue::SearchQueue;
@ -67,7 +67,7 @@ pub async fn multi_search_with_post(
// Apply search rules from tenant token
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
{
add_search_rules(&mut query.filter, search_rules);
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler
@ -83,14 +83,11 @@ pub async fn multi_search_with_post(
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
.with_index(query_index)?;
let retrieve_vector =
RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector)
})
.await
.with_index(query_index)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind))
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),

View File

@ -11,11 +11,10 @@ use indexmap::IndexMap;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
@ -60,8 +59,6 @@ pub struct SearchQuery {
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
pub retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
@ -78,8 +75,6 @@ pub struct SearchQuery {
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -92,44 +87,6 @@ pub struct SearchQuery {
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThreshold(f64);
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSearchRankingScoreThreshold)
} else {
Ok(RankingScoreThreshold(f))
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdSimilar(f64);
impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSimilarRankingScoreThreshold)
} else {
Ok(Self(f))
}
}
}
// Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
@ -146,7 +103,6 @@ impl fmt::Debug for SearchQuery {
page,
hits_per_page,
attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop,
crop_length,
attributes_to_highlight,
@ -155,14 +111,12 @@ impl fmt::Debug for SearchQuery {
show_ranking_score_details,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
attributes_to_search_on,
ranking_score_threshold,
} = self;
let mut debug = f.debug_struct("SearchQuery");
@ -180,9 +134,6 @@ impl fmt::Debug for SearchQuery {
if let Some(q) = q {
debug.field("q", &q);
}
if *retrieve_vectors {
debug.field("retrieve_vectors", &retrieve_vectors);
}
if let Some(v) = vector {
if v.len() < 10 {
debug.field("vector", &v);
@ -205,9 +156,6 @@ impl fmt::Debug for SearchQuery {
if let Some(sort) = sort {
debug.field("sort", &sort);
}
if let Some(distinct) = distinct {
debug.field("distinct", &distinct);
}
if let Some(facets) = facets {
debug.field("facets", &facets);
}
@ -240,9 +188,6 @@ impl fmt::Debug for SearchQuery {
debug.field("highlight_pre_tag", &highlight_pre_tag);
debug.field("highlight_post_tag", &highlight_post_tag);
debug.field("crop_marker", &crop_marker);
if let Some(ranking_score_threshold) = ranking_score_threshold {
debug.field("ranking_score_threshold", &ranking_score_threshold);
}
debug.finish()
}
@ -286,7 +231,7 @@ impl SearchKind {
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
}
pub(crate) fn embedder(
fn embedder(
index_scheduler: &index_scheduler::IndexScheduler,
index: &Index,
embedder_name: Option<&str>,
@ -383,8 +328,6 @@ pub struct SearchQueryWithIndex {
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)]
pub retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
@ -401,8 +344,6 @@ pub struct SearchQueryWithIndex {
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -415,8 +356,6 @@ pub struct SearchQueryWithIndex {
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
impl SearchQueryWithIndex {
@ -430,7 +369,6 @@ impl SearchQueryWithIndex {
page,
hits_per_page,
attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop,
crop_length,
attributes_to_highlight,
@ -439,7 +377,6 @@ impl SearchQueryWithIndex {
show_matches_position,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
@ -447,7 +384,6 @@ impl SearchQueryWithIndex {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = self;
(
index_uid,
@ -459,7 +395,6 @@ impl SearchQueryWithIndex {
page,
hits_per_page,
attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop,
crop_length,
attributes_to_highlight,
@ -468,7 +403,6 @@ impl SearchQueryWithIndex {
show_matches_position,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
@ -476,7 +410,6 @@ impl SearchQueryWithIndex {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@ -484,63 +417,6 @@ impl SearchQueryWithIndex {
}
}
#[derive(Debug, Clone, PartialEq, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SimilarQuery {
#[deserr(error = DeserrJsonError<InvalidSimilarId>)]
pub id: ExternalDocumentId,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
pub embedder: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
pub retrieve_vectors: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)]
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
}
#[derive(Debug, Clone, PartialEq, Deserr)]
#[deserr(try_from(Value) = TryFrom::try_from -> InvalidSimilarId)]
pub struct ExternalDocumentId(String);
impl AsRef<str> for ExternalDocumentId {
fn as_ref(&self) -> &str {
&self.0
}
}
impl ExternalDocumentId {
pub fn into_inner(self) -> String {
self.0
}
}
impl TryFrom<String> for ExternalDocumentId {
type Error = InvalidSimilarId;
fn try_from(value: String) -> Result<Self, Self::Error> {
serde_json::Value::String(value).try_into()
}
}
impl TryFrom<Value> for ExternalDocumentId {
type Error = InvalidSimilarId;
fn try_from(value: Value) -> Result<Self, Self::Error> {
Ok(Self(milli::documents::validate_document_id_value(value).map_err(|_| InvalidSimilarId)?))
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
@ -548,8 +424,6 @@ pub enum MatchingStrategy {
Last,
/// All query words are mandatory
All,
/// Remove query words from the most frequent to the least
Frequency,
}
impl Default for MatchingStrategy {
@ -563,7 +437,6 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
match other {
MatchingStrategy::Last => Self::Last,
MatchingStrategy::All => Self::All,
MatchingStrategy::Frequency => Self::Frequency,
}
}
}
@ -665,16 +538,6 @@ impl fmt::Debug for SearchResult {
}
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct SimilarResult {
pub hits: Vec<SearchHit>,
pub id: String,
pub processing_time_ms: u128,
#[serde(flatten)]
pub hits_info: HitsInfo,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWithIndex {
@ -707,8 +570,8 @@ pub struct FacetSearchResult {
}
/// Incorporate search rules in search query
pub fn add_search_rules(filter: &mut Option<Value>, rules: IndexSearchRules) {
*filter = match (filter.take(), rules.filter) {
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
@ -735,13 +598,6 @@ fn prepare_search<'t>(
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn);
search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
search.ranking_score_threshold(ranking_score_threshold.0);
}
if let Some(distinct) = &query.distinct {
search.distinct(distinct.clone());
}
match search_kind {
SearchKind::KeywordOnly => {
@ -783,16 +639,11 @@ fn prepare_search<'t>(
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
search.scoring_strategy(
if query.show_ranking_score
|| query.show_ranking_score_details
|| query.ranking_score_threshold.is_some()
{
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
},
);
search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
});
// compute the offset on the limit depending on the pagination mode.
let (offset, limit) = if is_finite_pagination {
@ -837,7 +688,6 @@ pub fn perform_search(
index: &Index,
query: SearchQuery,
search_kind: SearchKind,
retrieve_vectors: RetrieveVectors,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
@ -869,57 +719,131 @@ pub fn perform_search(
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
};
let SearchQuery {
q,
limit,
page,
hits_per_page,
attributes_to_retrieve,
// use the enum passed as parameter
retrieve_vectors: _,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
show_ranking_score,
show_ranking_score_details,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
// already used in prepare_search
vector: _,
hybrid: _,
offset: _,
ranking_score_threshold: _,
matching_strategy: _,
attributes_to_search_on: _,
filter: _,
distinct: _,
} = query;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let format = AttributesFormat {
attributes_to_retrieve,
retrieve_vectors,
attributes_to_highlight,
attributes_to_crop,
crop_length,
crop_marker,
highlight_pre_tag,
highlight_post_tag,
show_matches_position,
sort,
show_ranking_score,
show_ranking_score_details,
let displayed_ids = index
.displayed_fields_ids(&rtxn)?
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let fids = |attrs: &BTreeSet<String>| {
let mut ids = BTreeSet::new();
for attr in attrs {
if attr == "*" {
ids = displayed_ids.clone();
break;
}
if let Some(id) = fields_ids_map.id(attr) {
ids.insert(id);
}
}
ids
};
let documents =
make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?;
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
// but these attributes must be also be present
// - in the fields_ids_map
// - in the displayed attributes
let to_retrieve_ids: BTreeSet<_> = query
.attributes_to_retrieve
.as_ref()
.map(fids)
.unwrap_or_else(|| displayed_ids.clone())
.intersection(&displayed_ids)
.cloned()
.collect();
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
// These attributes are:
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
// But these attributes must be also present in displayed attributes
let formatted_options = compute_formatted_options(
&attr_to_highlight,
&attr_to_crop,
query.crop_length,
&to_retrieve_ids,
&fields_ids_map,
&displayed_ids,
);
let mut tokenizer_builder = TokenizerBuilder::default();
tokenizer_builder.create_char_map(true);
let script_lang_map = index.script_language(&rtxn)?;
if !script_lang_map.is_empty() {
tokenizer_builder.allow_list(&script_lang_map);
}
let separators = index.allowed_separators(&rtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref separators) = separators {
tokenizer_builder.separators(separators);
}
let dictionary = index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref dictionary) = dictionary {
tokenizer_builder.words_dict(dictionary);
}
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
formatter_builder.crop_marker(query.crop_marker);
formatter_builder.highlight_prefix(query.highlight_pre_tag);
formatter_builder.highlight_suffix(query.highlight_post_tag);
let mut documents = Vec::new();
let documents_iter = index.documents(&rtxn, documents_ids)?;
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
// select the attributes to retrieve
let attributes_to_retrieve = to_retrieve_ids
.iter()
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
let mut document =
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
let (matches_position, formatted) = format_fields(
&displayed_document,
&fields_ids_map,
&formatter_builder,
&formatted_options,
query.show_matches_position,
&displayed_ids,
)?;
if let Some(sort) = query.sort.as_ref() {
insert_geo_distance(sort, &mut document);
}
let ranking_score =
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
let ranking_score_details =
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
let hit = SearchHit {
document,
formatted,
matches_position,
ranking_score_details,
ranking_score,
};
documents.push(hit);
}
let number_of_hits = min(candidates.len() as usize, max_total_hits);
let hits_info = if is_finite_pagination {
let hits_per_page = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
.checked_div(hits_per_page)
@ -927,15 +851,15 @@ pub fn perform_search(
HitsInfo::Pagination {
hits_per_page,
page: page.unwrap_or(1),
page: query.page.unwrap_or(1),
total_pages,
total_hits: number_of_hits,
}
} else {
HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
};
let (facet_distribution, facet_stats) = match facets {
let (facet_distribution, facet_stats) = match query.facets {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
@ -972,7 +896,7 @@ pub fn perform_search(
let result = SearchResult {
hits: documents,
hits_info,
query: q.unwrap_or_default(),
query: query.q.unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution,
facet_stats,
@ -983,214 +907,6 @@ pub fn perform_search(
Ok(result)
}
struct AttributesFormat {
attributes_to_retrieve: Option<BTreeSet<String>>,
retrieve_vectors: RetrieveVectors,
attributes_to_highlight: Option<HashSet<String>>,
attributes_to_crop: Option<Vec<String>>,
crop_length: usize,
crop_marker: String,
highlight_pre_tag: String,
highlight_post_tag: String,
show_matches_position: bool,
sort: Option<Vec<String>>,
show_ranking_score: bool,
show_ranking_score_details: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RetrieveVectors {
/// Do not touch the `_vectors` field
///
/// this is the behavior when the vectorStore feature is disabled
Ignore,
/// Remove the `_vectors` field
///
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `false`
Hide,
/// Retrieve vectors from the DB and merge them into the `_vectors` field
///
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `true`
Retrieve,
}
impl RetrieveVectors {
pub fn new(
retrieve_vector: bool,
features: index_scheduler::RoFeatures,
) -> Result<Self, index_scheduler::Error> {
match (retrieve_vector, features.check_vector("Passing `retrieveVectors` as a parameter")) {
(true, Ok(())) => Ok(Self::Retrieve),
(true, Err(error)) => Err(error),
(false, Ok(())) => Ok(Self::Hide),
(false, Err(_)) => Ok(Self::Ignore),
}
}
}
fn make_hits(
index: &Index,
rtxn: &RoTxn<'_>,
format: AttributesFormat,
matching_words: milli::MatchingWords,
documents_ids: Vec<u32>,
document_scores: Vec<Vec<ScoreDetails>>,
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
let displayed_ids =
index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
(None, _) => false,
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
(Some(_), None) => true,
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
};
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
if vectors_is_hidden {
RetrieveVectors::Hide
} else {
RetrieveVectors::Retrieve
}
} else {
format.retrieve_vectors
};
let displayed_ids =
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let fids = |attrs: &BTreeSet<String>| {
let mut ids = BTreeSet::new();
for attr in attrs {
if attr == "*" {
ids.clone_from(&displayed_ids);
break;
}
if let Some(id) = fields_ids_map.id(attr) {
ids.insert(id);
}
}
ids
};
let to_retrieve_ids: BTreeSet<_> = format
.attributes_to_retrieve
.as_ref()
.map(fids)
.unwrap_or_else(|| displayed_ids.clone())
.intersection(&displayed_ids)
.cloned()
.collect();
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
let formatted_options = compute_formatted_options(
&attr_to_highlight,
&attr_to_crop,
format.crop_length,
&to_retrieve_ids,
&fields_ids_map,
&displayed_ids,
);
let mut tokenizer_builder = TokenizerBuilder::default();
tokenizer_builder.create_char_map(true);
let script_lang_map = index.script_language(rtxn)?;
if !script_lang_map.is_empty() {
tokenizer_builder.allow_list(&script_lang_map);
}
let separators = index.allowed_separators(rtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref separators) = separators {
tokenizer_builder.separators(separators);
}
let dictionary = index.dictionary(rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref dictionary) = dictionary {
tokenizer_builder.words_dict(dictionary);
}
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
formatter_builder.crop_marker(format.crop_marker);
formatter_builder.highlight_prefix(format.highlight_pre_tag);
formatter_builder.highlight_suffix(format.highlight_post_tag);
let mut documents = Vec::new();
let embedding_configs = index.embedding_configs(rtxn)?;
let documents_iter = index.documents(rtxn, documents_ids)?;
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
let add_vectors_fid =
vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve);
// select the attributes to retrieve
let attributes_to_retrieve = to_retrieve_ids
.iter()
// skip the vectors_fid if RetrieveVectors::Hide
.filter(|fid| match vectors_fid {
Some(vectors_fid) => {
!(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
}
None => true,
})
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
.chain(add_vectors_fid.iter())
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
let mut document =
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
if retrieve_vectors == RetrieveVectors::Retrieve {
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, id)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings =
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
vectors.insert(name, serde_json::to_value(embeddings)?);
}
document.insert("_vectors".into(), vectors.into());
}
let (matches_position, formatted) = format_fields(
&displayed_document,
&fields_ids_map,
&formatter_builder,
&formatted_options,
format.show_matches_position,
&displayed_ids,
)?;
if let Some(sort) = format.sort.as_ref() {
insert_geo_distance(sort, &mut document);
}
let ranking_score =
format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
let ranking_score_details =
format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
let hit = SearchHit {
document,
formatted,
matches_position,
ranking_score_details,
ranking_score,
};
documents.push(hit);
}
Ok(documents)
}
pub fn perform_facet_search(
index: &Index,
search_query: SearchQuery,
@ -1225,103 +941,6 @@ pub fn perform_facet_search(
})
}
pub fn perform_similar(
index: &Index,
query: SimilarQuery,
embedder_name: String,
embedder: Arc<Embedder>,
retrieve_vectors: RetrieveVectors,
) -> Result<SimilarResult, ResponseError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let SimilarQuery {
id,
offset,
limit,
filter: _,
embedder: _,
attributes_to_retrieve,
retrieve_vectors: _,
show_ranking_score,
show_ranking_score_details,
ranking_score_threshold,
} = query;
// using let-else rather than `?` so that the borrow checker identifies we're always returning here,
// preventing a use-after-move
let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else {
return Err(ResponseError::from_msg(
MeilisearchHttpError::DocumentNotFound(id.into_inner()).to_string(),
Code::NotFoundSimilarId,
));
};
let mut similar =
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter)
// inject InvalidSimilarFilter code
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))?
{
similar.filter(facets);
}
}
if let Some(ranking_score_threshold) = ranking_score_threshold {
similar.ranking_score_threshold(ranking_score_threshold.0);
}
let milli::SearchResult {
documents_ids,
matching_words: _,
candidates,
document_scores,
degraded: _,
used_negative_operator: _,
} = similar.execute().map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
ResponseError::from_msg(err.to_string(), Code::InvalidSimilarFilter)
}
err => err.into(),
})?;
let format = AttributesFormat {
attributes_to_retrieve,
retrieve_vectors,
attributes_to_highlight: None,
attributes_to_crop: None,
crop_length: DEFAULT_CROP_LENGTH(),
crop_marker: DEFAULT_CROP_MARKER(),
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
show_matches_position: false,
sort: None,
show_ranking_score,
show_ranking_score_details,
};
let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?;
let max_total_hits = index
.pagination_max_total_hits(&rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
let number_of_hits = min(candidates.len() as usize, max_total_hits);
let hits_info = HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits };
let result = SimilarResult {
hits,
hits_info,
id: id.into_inner(),
processing_time_ms: before_search.elapsed().as_millis(),
};
Ok(result)
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =

View File

@ -40,9 +40,8 @@ pub struct Permit {
impl Drop for Permit {
fn drop(&mut self) {
let sender = self.sender.clone();
// if the channel is closed then the whole instance is down
std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
let _ = futures::executor::block_on(self.sender.send(()));
}
}
@ -86,13 +85,8 @@ impl SearchQueue {
},
search_request = receive_new_searches.recv() => {
let search_request = match search_request {
Some(search_request) => search_request,
// This should never happen while actix-web is running, but it's not a reason to crash
// and it can generate a lot of noise in the tests.
None => continue,
};
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
let search_request = search_request.unwrap();
if searches_running < usize::from(parallelism) && queue.is_empty() {
searches_running += 1;
// if the search requests die it's not a hard error on our side

View File

@ -182,10 +182,14 @@ impl Index<'_> {
self.service.get(url).await
}
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
pub async fn get_document(
&self,
id: u64,
options: Option<GetDocumentOptions>,
) -> (Value, StatusCode) {
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
if let Some(options) = options {
write!(url, "?{}", yaup::to_string(&options).unwrap()).unwrap();
if let Some(fields) = options.and_then(|o| o.fields) {
let _ = write!(url, "?fields={}", fields.join(","));
}
self.service.get(url).await
}
@ -201,11 +205,18 @@ impl Index<'_> {
}
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
let url = format!(
"/indexes/{}/documents?{}",
urlencode(self.uid.as_ref()),
yaup::to_string(&options).unwrap()
);
let mut url = format!("/indexes/{}/documents?", urlencode(self.uid.as_ref()));
if let Some(limit) = options.limit {
let _ = write!(url, "limit={}&", limit);
}
if let Some(offset) = options.offset {
let _ = write!(url, "offset={}&", offset);
}
if let Some(attributes_to_retrieve) = options.attributes_to_retrieve {
let _ = write!(url, "fields={}&", attributes_to_retrieve.join(","));
}
self.service.get(url).await
}
@ -369,43 +380,6 @@ impl Index<'_> {
self.service.get(url).await
}
/// Performs both GET and POST similar queries
pub async fn similar(
&self,
query: Value,
test: impl Fn(Value, StatusCode) + UnwindSafe + Clone,
) {
let post = self.similar_post(query.clone()).await;
let query = yaup::to_string(&query).unwrap();
let get = self.similar_get(&query).await;
insta::allow_duplicates! {
let (response, code) = post;
let t = test.clone();
if let Err(e) = catch_unwind(move || t(response, code)) {
eprintln!("Error with post search");
resume_unwind(e);
}
let (response, code) = get;
if let Err(e) = catch_unwind(move || test(response, code)) {
eprintln!("Error with get search");
resume_unwind(e);
}
}
}
pub async fn similar_post(&self, query: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/similar", urlencode(self.uid.as_ref()));
self.service.post_encoded(url, query, self.encoder).await
}
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await
}
pub async fn facet_search(&self, query: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/facet-search", urlencode(self.uid.as_ref()));
self.service.post_encoded(url, query, self.encoder).await
@ -424,11 +398,13 @@ impl Index<'_> {
}
}
#[derive(Debug, Default, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GetDocumentOptions {
pub fields: Option<Vec<&'static str>>,
}
#[derive(Debug, Default)]
pub struct GetAllDocumentsOptions {
pub limit: Option<usize>,
pub offset: Option<usize>,
pub retrieve_vectors: bool,
pub fields: Option<Vec<&'static str>>,
pub attributes_to_retrieve: Option<Vec<&'static str>>,
}

View File

@ -6,7 +6,7 @@ pub mod service;
use std::fmt::{self, Display};
#[allow(unused)]
pub use index::GetAllDocumentsOptions;
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
use meili_snap::json_string;
use serde::{Deserialize, Serialize};
#[allow(unused)]
@ -65,14 +65,7 @@ impl Display for Value {
write!(
f,
"{}",
json_string!(self, {
".enqueuedAt" => "[date]",
".startedAt" => "[date]",
".finishedAt" => "[date]",
".duration" => "[duration]",
".processingTimeMs" => "[duration]",
".details.embedders.*.url" => "[url]"
})
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
)
}
}

View File

@ -795,70 +795,3 @@ async fn fetch_document_by_filter() {
}
"###);
}
#[actix_rt::test]
async fn retrieve_vectors() {
let server = Server::new().await;
let index = server.index("doggo");
// GET ALL DOCUMENTS BY QUERY
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await;
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
}
"###);
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=true").await;
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// FETCH ALL DOCUMENTS BY POST
let (response, _code) =
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",
"code": "invalid_document_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
}
"###);
let (response, _code) = index.get_document_by_filter(json!({ "retrieveVectors": true })).await;
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// GET A SINGLE DOCUMENT
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": "tamo"}))).await;
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors"
}
"###);
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@ -4,7 +4,7 @@ use meili_snap::*;
use urlencoding::encode as urlencode;
use crate::common::encoder::Encoder;
use crate::common::{GetAllDocumentsOptions, Server, Value};
use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server, Value};
use crate::json;
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
@ -59,7 +59,8 @@ async fn get_document() {
})
);
let (response, code) = index.get_document(0, Some(json!({ "fields": ["id"] }))).await;
let (response, code) =
index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["id"]) })).await;
assert_eq!(code, 200);
assert_eq!(
response,
@ -68,8 +69,9 @@ async fn get_document() {
})
);
let (response, code) =
index.get_document(0, Some(json!({ "fields": ["nested.content"] }))).await;
let (response, code) = index
.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["nested.content"]) }))
.await;
assert_eq!(code, 200);
assert_eq!(
response,
@ -209,7 +211,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["name"]),
attributes_to_retrieve: Some(vec!["name"]),
..Default::default()
})
.await;
@ -223,19 +225,9 @@ async fn test_get_all_documents_attributes_to_retrieve() {
assert_eq!(response["limit"], json!(20));
assert_eq!(response["total"], json!(77));
let (response, code) = index.get_all_documents_raw("?fields=").await;
assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 20);
for results in response["results"].as_array().unwrap() {
assert_eq!(results.as_object().unwrap().keys().count(), 0);
}
assert_eq!(response["offset"], json!(0));
assert_eq!(response["limit"], json!(20));
assert_eq!(response["total"], json!(77));
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["wrong"]),
attributes_to_retrieve: Some(vec![]),
..Default::default()
})
.await;
@ -250,7 +242,22 @@ async fn test_get_all_documents_attributes_to_retrieve() {
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["name", "tags"]),
attributes_to_retrieve: Some(vec!["wrong"]),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 20);
for results in response["results"].as_array().unwrap() {
assert_eq!(results.as_object().unwrap().keys().count(), 0);
}
assert_eq!(response["offset"], json!(0));
assert_eq!(response["limit"], json!(20));
assert_eq!(response["total"], json!(77));
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
attributes_to_retrieve: Some(vec!["name", "tags"]),
..Default::default()
})
.await;
@ -263,7 +270,10 @@ async fn test_get_all_documents_attributes_to_retrieve() {
}
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions { fields: Some(vec!["*"]), ..Default::default() })
.get_all_documents(GetAllDocumentsOptions {
attributes_to_retrieve: Some(vec!["*"]),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 20);
@ -273,7 +283,7 @@ async fn test_get_all_documents_attributes_to_retrieve() {
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["*", "wrong"]),
attributes_to_retrieve: Some(vec!["*", "wrong"]),
..Default::default()
})
.await;
@ -306,10 +316,12 @@ async fn get_document_s_nested_attributes_to_retrieve() {
assert_eq!(code, 202);
index.wait_task(1).await;
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await;
let (response, code) =
index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await;
assert_eq!(code, 200);
assert_eq!(response, json!({}));
let (response, code) = index.get_document(1, Some(json!({ "fields": ["content"] }))).await;
let (response, code) =
index.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await;
assert_eq!(code, 200);
assert_eq!(
response,
@ -321,7 +333,9 @@ async fn get_document_s_nested_attributes_to_retrieve() {
})
);
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content.truc"] }))).await;
let (response, code) = index
.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) }))
.await;
assert_eq!(code, 200);
assert_eq!(
response,
@ -329,7 +343,9 @@ async fn get_document_s_nested_attributes_to_retrieve() {
"content.truc": "foobar",
})
);
let (response, code) = index.get_document(1, Some(json!({ "fields": ["content.truc"] }))).await;
let (response, code) = index
.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) }))
.await;
assert_eq!(code, 200);
assert_eq!(
response,
@ -524,207 +540,3 @@ async fn get_document_by_filter() {
}
"###);
}
#[actix_rt::test]
async fn get_document_with_vectors() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": null }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
// by default you shouldn't see the `_vectors` object
let (documents, _code) = index.get_all_documents(Default::default()).await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir"
},
{
"id": 1,
"name": "echo"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) = index.get_document(0, None).await;
snapshot!(json_string!(documents), @r###"
{
"id": 0,
"name": "kefir"
}
"###);
// if we try to retrieve the vectors with the `fields` parameter they
// still shouldn't be displayed
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["name", "_vectors"]),
..Default::default()
})
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"name": "kefir"
},
{
"name": "echo"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) =
index.get_document(0, Some(json!({"fields": ["name", "_vectors"]}))).await;
snapshot!(json_string!(documents), @r###"
{
"name": "kefir"
}
"###);
// If we specify the retrieve vectors boolean and nothing else we should get the vectors
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
snapshot!(json_string!(documents), @r###"
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
}
"###);
// If we specify the retrieve vectors boolean and exclude vectors form the `fields` we should still get the vectors
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
retrieve_vectors: true,
fields: Some(vec!["name"]),
..Default::default()
})
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (documents, _code) =
index.get_document(0, Some(json!({"retrieveVectors": true, "fields": ["name"]}))).await;
snapshot!(json_string!(documents), @r###"
{
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
}
"###);
}

View File

@ -1859,7 +1859,8 @@ async fn import_dump_v6_containing_experimental_features() {
{
"vectorStore": false,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -1938,210 +1939,3 @@ async fn import_dump_v6_containing_experimental_features() {
})
.await;
}
// In this test we must generate the dump ourselves to ensure the
// `user provided` vectors are well set
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn generate_and_import_dump_containing_vectors() {
let temp = tempfile::tempdir().unwrap();
let mut opt = default_settings(temp.path());
let server = Server::new_with_options(opt.clone()).await.unwrap();
let (code, _) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let index = server.index("pets");
let (response, code) = index
.update_settings(json!(
{
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}}",
}
}
}
))
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = index
.add_documents(
json!([
{"id": 0, "doggo": "kefir", "_vectors": { "doggo_embedder": vec![0; 384] }},
{"id": 1, "doggo": "echo", "_vectors": { "doggo_embedder": { "regenerate": false, "embeddings": vec![1; 384] }}},
{"id": 2, "doggo": "intel", "_vectors": { "doggo_embedder": { "regenerate": true, "embeddings": vec![2; 384] }}},
{"id": 3, "doggo": "bill", "_vectors": { "doggo_embedder": { "regenerate": true }}},
{"id": 4, "doggo": "max" },
]),
None,
)
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = server.create_dump().await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// ========= We made a dump, now we should clear the DB and try to import our dump
drop(server);
tokio::fs::remove_dir_all(&opt.db_path).await.unwrap();
let dump_name = format!("{}.dump", response["details"]["dumpUid"].as_str().unwrap());
let dump_path = opt.dump_dir.join(dump_name);
assert!(dump_path.exists(), "path: `{}`", dump_path.display());
opt.import_dump = Some(dump_path);
// NOTE: We shouldn't have to change the database path but I lost one hour
// because of a « bad path » error and that fixed it.
opt.db_path = temp.path().join("data.ms");
let mut server = Server::new_auth_with_options(opt, temp).await;
server.use_api_key("MASTER_KEY");
let (indexes, code) = server.list_indexes(None, None).await;
assert_eq!(code, 200, "{indexes}");
snapshot!(indexes["results"].as_array().unwrap().len(), @"1");
snapshot!(indexes["results"][0]["uid"], @r###""pets""###);
snapshot!(indexes["results"][0]["primaryKey"], @r###""id""###);
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let index = server.index("pets");
let (response, code) = index.settings().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"nonSeparatorTokens": [],
"separatorTokens": [],
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100,
"sortFacetValuesBy": {
"*": "alpha"
}
},
"pagination": {
"maxTotalHits": 1000
},
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}}"
}
},
"searchCutoffMs": null
}
"###);
index
.search(json!({"retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"], { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###"
[
{
"id": 0,
"doggo": "kefir",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": false
}
}
},
{
"id": 1,
"doggo": "echo",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": false
}
}
},
{
"id": 2,
"doggo": "intel",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 3,
"doggo": "bill",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 4,
"doggo": "max",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
}
]
"###);
})
.await;
}

View File

@ -1,25 +0,0 @@
---
source: meilisearch/tests/dumps/mod.rs
---
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}}"
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -1,19 +0,0 @@
---
source: meilisearch/tests/dumps/mod.rs
---
{
"uid": 1,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 5,
"indexedDocuments": 5
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -20,7 +20,8 @@ async fn experimental_features() {
{
"vectorStore": false,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -31,7 +32,8 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -42,7 +44,8 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -54,7 +57,8 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -66,7 +70,8 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
}
@ -85,7 +90,8 @@ async fn experimental_feature_metrics() {
{
"vectorStore": false,
"metrics": true,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -140,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`",
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -8,12 +8,10 @@ mod index;
mod logs;
mod search;
mod settings;
mod similar;
mod snapshot;
mod stats;
mod swap_indexes;
mod tasks;
mod vector;
// Tests are isolated by features in different modules to allow better readability, test
// targetability, and improved incremental compilation times.

View File

@ -107,39 +107,6 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
])
});
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 1,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Brown", "pattern": "stripped" },
},
{
"id": 2,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Black", "pattern": "stripped" },
},
{
"id": 3,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": { "main": "Blue", "pattern": "used" },
},
{
"id": 4,
"description": "T-Shirt",
"brand": "Nike",
"product_id": "789012",
"color": { "main": "Blue", "pattern": "stripped" },
}
])
});
static DOCUMENT_PRIMARY_KEY: &str = "id";
static DOCUMENT_DISTINCT_KEY: &str = "product_id";
@ -272,35 +239,3 @@ async fn distinct_search_with_pagination_no_ranking() {
snapshot!(response["totalPages"], @"2");
snapshot!(response["totalHits"], @"6");
}
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new().await;
let index = server.index("tamo");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await;
let task = index.wait_task(task.uid()).await;
snapshot!(task, name: "succeed");
fn get_hits(response: &Value) -> Vec<String> {
let hits_array = response["hits"]
.as_array()
.unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap()));
hits_array
.iter()
.map(|h| h[DOCUMENT_PRIMARY_KEY].as_number().unwrap().to_string())
.collect::<Vec<_>>()
}
let (response, code) =
index.search_post(json!({"page": 1, "hitsPerPage": 3, "distinct": "color.main"})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"3");
snapshot!(format!("{:?}", hits), @r###"["1", "2", "3"]"###);
snapshot!(response["page"], @"1");
snapshot!(response["totalPages"], @"1");
snapshot!(response["totalHits"], @"3");
}

View File

@ -167,74 +167,6 @@ async fn search_bad_hits_per_page() {
"###);
}
#[actix_rt::test]
async fn search_bad_attributes_to_retrieve() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"attributesToRetrieve": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.attributesToRetrieve`: expected an array, but found a string: `\"doggo\"`",
"code": "invalid_search_attributes_to_retrieve",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_retrieve"
}
"###);
// Can't make the `attributes_to_retrieve` fail with a get search since it'll accept anything as an array of strings.
}
#[actix_rt::test]
async fn search_bad_retrieve_vectors() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"retrieveVectors": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
let (response, code) = index.search_post(json!({"retrieveVectors": [true]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
let (response, code) = index.search_get("retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
let (response, code) = index.search_get("retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`",
"code": "invalid_search_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors"
}
"###);
}
#[actix_rt::test]
async fn search_bad_attributes_to_crop() {
let server = Server::new().await;
@ -389,40 +321,6 @@ async fn search_bad_facets() {
// Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings.
}
#[actix_rt::test]
async fn search_bad_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`",
"code": "invalid_search_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn search_invalid_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_search_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn search_non_filterable_facets() {
let server = Server::new().await;
@ -607,7 +505,7 @@ async fn search_bad_matching_strategy() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`, `frequency`",
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`",
"code": "invalid_search_matching_strategy",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
@ -629,7 +527,7 @@ async fn search_bad_matching_strategy() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`, `frequency`",
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`",
"code": "invalid_search_matching_strategy",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
@ -1140,66 +1038,3 @@ async fn search_on_unknown_field_plus_joker() {
)
.await;
}
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new().await;
let index = server.index("tamo");
let (task, _) = index.create(None).await;
let task = index.wait_task(task.uid()).await;
snapshot!(task, name: "task-succeed");
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
index.wait_task(task.uid()).await;
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
index.wait_task(task.uid()).await;
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.distinct`: expected a string, but found a boolean: `true`",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
}

View File

@ -117,69 +117,3 @@ async fn geo_bounding_box_with_string_and_number() {
)
.await;
}
#[actix_rt::test]
async fn bug_4640() {
// https://github.com/meilisearch/meilisearch/issues/4640
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.update_settings_filterable_attributes(json!(["_geo"])).await;
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
index.wait_task(ret.uid()).await;
// Sort the document with the second one first
index
.search(
json!({
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
}
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
},
"_geoDistance": 9714063
},
{
"id": 3,
"name": "CrĂŞpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
],
"query": "",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"###);
},
)
.await;
}

View File

@ -5,10 +5,7 @@ use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
async fn index_with_documents_user_provided<'a>(
server: &'a Server,
documents: &Value,
) -> Index<'a> {
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
@ -18,7 +15,8 @@ async fn index_with_documents_user_provided<'a>(
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -36,38 +34,7 @@ async fn index_with_documents_user_provided<'a>(
index
}
async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({ "embedders": {"default": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.title}}, {{doc.desc}}"
}}} ))
.await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS_VEC: Lazy<Value> = Lazy::new(|| {
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
@ -89,7 +56,7 @@ static SIMPLE_SEARCH_DOCUMENTS_VEC: Lazy<Value> = Lazy::new(|| {
}])
});
static SINGLE_DOCUMENT_VEC: Lazy<Value> = Lazy::new(|| {
static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
json!([{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
@ -98,116 +65,48 @@ static SINGLE_DOCUMENT_VEC: Lazy<Value> = Lazy::new(|| {
}])
});
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
}])
});
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}}}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"2");
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"3");
}
#[actix_rt::test]
async fn simple_search_hf() {
let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) =
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index
.search_post(
// disable ranking score as the vectors between architectures are not equal
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"1");
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
snapshot!(response["semanticHitCount"], @"3");
let (response, code) = index
.search_post(
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}]"###);
snapshot!(response["semanticHitCount"], @"3");
let (response, code) = index
.search_post(
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"3");
}
#[actix_rt::test]
async fn distribution_shift() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
let (response, code) = index.search_post(search.clone()).await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
let (response, code) = index
.update_settings(json!({
@ -228,34 +127,31 @@ async fn distribution_shift() {
let (response, code) = index.search_post(search).await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.1920928955078125e-7}]"###);
}
#[actix_rt::test]
async fn highlighter() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.2},
"retrieveVectors": true,
"attributesToHighlight": [
"desc",
"_vectors",
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**",
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.8},
"retrieveVectors": true,
"showRankingScore": true,
"attributesToHighlight": [
"desc"
@ -265,14 +161,13 @@ async fn highlighter() {
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"3");
// no highlighting on full semantic
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 1.0},
"retrieveVectors": true,
"showRankingScore": true,
"attributesToHighlight": [
"desc"
@ -282,14 +177,14 @@ async fn highlighter() {
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"3");
}
#[actix_rt::test]
async fn invalid_semantic_ratio() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(
@ -361,45 +256,45 @@ async fn invalid_semantic_ratio() {
#[actix_rt::test]
async fn single_document() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SINGLE_DOCUMENT_VEC).await;
let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
let (response, code) = index
.search_post(
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0}"###);
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0}"###);
snapshot!(response["semanticHitCount"], @"1");
}
#[actix_rt::test]
async fn query_combination() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
// search without query and vector, but with hybrid => still placeholder
let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
snapshot!(response["semanticHitCount"], @"null");
// same with a different semantic ratio
let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
snapshot!(response["semanticHitCount"], @"null");
// wrong vector dimensions
let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
.await;
snapshot!(code, @"400 Bad Request");
@ -414,34 +309,34 @@ async fn query_combination() {
// full vector
let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.6581138968467712}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.6581138968467712}]"###);
snapshot!(response["semanticHitCount"], @"3");
// full keyword, without a query
let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###);
snapshot!(response["semanticHitCount"], @"null");
// query + vector, full keyword => keyword
let (response, code) = index
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.8848484848484849}]"###);
snapshot!(response["semanticHitCount"], @"null");
// query + vector, no hybrid keyword =>
let (response, code) = index
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true}))
.await;
snapshot!(code, @"400 Bad Request");
@ -457,7 +352,7 @@ async fn query_combination() {
// full vector, without a vector => error
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
)
.await;
@ -474,93 +369,11 @@ async fn query_combination() {
// hybrid without a vector => full keyword
let (response, code) = index
.search_post(
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848}]"###);
snapshot!(response["semanticHitCount"], @"0");
}
#[actix_rt::test]
async fn retrieve_vectors() {
let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
}
]
"###);
// remove `_vectors` from displayed attributes
let (response, code) =
index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2"
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3"
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1"
}
]
"###);
}

View File

@ -1,128 +0,0 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
index.add_documents(documents.clone(), None).await;
index.wait_task(0).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "1",
},
{
"title": "Captain Planet",
"id": "2",
},
{
"title": "Captain Marvel",
"id": "3",
},
{
"title": "a Captain Marvel ersatz",
"id": "4"
},
{
"title": "He's not part of the Marvel Cinematic Universe",
"id": "5"
},
{
"title": "a Shazam ersatz, but better than Captain Planet",
"id": "6"
},
{
"title": "Capitain CAAAAAVEEERNE!!!!",
"id": "7"
}
])
});
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"2"},{"id":"6"},{"id":"7"}]"###);
})
.await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
})
.await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}
#[actix_rt::test]
async fn search_with_typo() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"7"},{"id":"2"},{"id":"6"}]"###);
})
.await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
})
.await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}
#[actix_rt::test]
async fn search_with_unknown_word() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"2"},{"id":"3"},{"id":"4"},{"id":"6"},{"id":"7"}]"###);
})
.await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @"[]");
})
.await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}

View File

@ -7,7 +7,6 @@ mod facet_search;
mod formatted;
mod geo;
mod hybrid;
mod matching_strategy;
mod multi;
mod pagination;
mod restrict_searchable;
@ -48,31 +47,6 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
])
});
static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
},
{
"title": "Batman Returns",
"id": "C",
},
{
"title": "Batman",
"id": "D",
},
{
"title": "Badman",
"id": "E",
}
])
});
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
@ -301,7 +275,7 @@ async fn negative_special_cases_search() {
index.add_documents(documents, None).await;
index.wait_task(0).await;
index.update_settings(json!({"synonyms": { "escape": ["gläss"] }})).await;
index.update_settings(json!({"synonyms": { "escape": ["glass"] }})).await;
index.wait_task(1).await;
// There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass
@ -706,26 +680,6 @@ async fn search_facet_distribution() {
},
)
.await;
index.update_settings(json!({"filterableAttributes": ["doggos.name"]})).await;
index.wait_task(5).await;
index
.search(
json!({
"facets": ["doggos.name"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 1);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
},
)
.await;
}
#[actix_rt::test]
@ -941,9 +895,9 @@ async fn test_score_details() {
"id": "166428",
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
},
"_rankingScoreDetails": {
@ -967,7 +921,7 @@ async fn test_score_details() {
"order": 3,
"attributeRankingOrderScore": 1.0,
"queryWordDistanceScore": 0.8095238095238095,
"score": 0.8095238095238095
"score": 0.9727891156462584
},
"exactness": {
"order": 4,
@ -985,213 +939,6 @@ async fn test_score_details() {
.await;
}
#[actix_rt::test]
async fn test_score() {
let server = Server::new().await;
let index = server.index("test");
let documents = SCORE_DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index
.search(
json!({
"q": "Badman the dark knight returns 1",
"showRankingScore": true,
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.9746605609456898
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.8055252965383685
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.16666666666666666
},
{
"title": "Batman Returns",
"id": "C",
"_rankingScore": 0.07702020202020202
},
{
"title": "Batman",
"id": "D",
"_rankingScore": 0.07702020202020202
}
]
"###);
},
)
.await;
}
#[actix_rt::test]
async fn test_score_threshold() {
let query = "Badman dark returns 1";
let server = Server::new().await;
let index = server.index("test");
let documents = SCORE_DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.0
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.25
},
{
"title": "Batman Returns",
"id": "C",
"_rankingScore": 0.11553030303030302
},
{
"title": "Batman",
"id": "D",
"_rankingScore": 0.11553030303030302
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.2
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.25
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.5
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.8
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 1.0
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###);
// nobody is perfect
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
},
)
.await;
}
#[actix_rt::test]
async fn test_degraded_score_details() {
let server = Server::new().await;
@ -1290,38 +1037,21 @@ async fn experimental_feature_vector_store() {
index.add_documents(json!(documents), None).await;
index.wait_task(0).await;
index
.search(json!({
let (response, code) = index
.search_post(json!({
"vector": [1.0, 2.0, 3.0],
"showRankingScore": true
}), |response, code|{
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
index
.search(json!({
"retrieveVectors": true,
"showRankingScore": true
}), |response, code|{
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
}))
.await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `vector` as a query parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
meili_snap::snapshot!(code, @"200 OK");
@ -1354,7 +1084,6 @@ async fn experimental_feature_vector_store() {
.search_post(json!({
"vector": [1.0, 2.0, 3.0],
"showRankingScore": true,
"retrieveVectors": true,
}))
.await;
@ -1366,16 +1095,11 @@ async fn experimental_feature_vector_store() {
"title": "Shazam!",
"id": "287947",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
2.0,
3.0
]
],
"regenerate": false
}
"manual": [
1,
2,
3
]
},
"_rankingScore": 1.0
},
@ -1383,16 +1107,11 @@ async fn experimental_feature_vector_store() {
"title": "Captain Marvel",
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
2.0,
54.0
]
],
"regenerate": false
}
"manual": [
1,
2,
54
]
},
"_rankingScore": 0.9129111766815186
},
@ -1400,16 +1119,11 @@ async fn experimental_feature_vector_store() {
"title": "Gläss",
"id": "450465",
"_vectors": {
"manual": {
"embeddings": [
[
-100.0,
340.0,
90.0
]
],
"regenerate": false
}
"manual": [
-100,
340,
90
]
},
"_rankingScore": 0.8106412887573242
},
@ -1417,16 +1131,11 @@ async fn experimental_feature_vector_store() {
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
-100.0,
231.0,
32.0
]
],
"regenerate": false
}
"manual": [
-100,
231,
32
]
},
"_rankingScore": 0.7412010431289673
},
@ -1434,16 +1143,11 @@ async fn experimental_feature_vector_store() {
"title": "Escape Room",
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
10.0,
-23.0,
32.0
]
],
"regenerate": false
}
"manual": [
10,
-23,
32
]
},
"_rankingScore": 0.6972063183784485
}
@ -1701,9 +1405,9 @@ async fn simple_search_with_strange_synonyms() {
"id": "166428",
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
}
}
@ -1722,9 +1426,9 @@ async fn simple_search_with_strange_synonyms() {
"id": "166428",
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
}
}
@ -1743,9 +1447,9 @@ async fn simple_search_with_strange_synonyms() {
"id": "166428",
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
}
}

View File

@ -75,9 +75,9 @@ async fn simple_search_single_index() {
"id": "450465",
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
}
}
@ -96,9 +96,9 @@ async fn simple_search_single_index() {
"id": "299537",
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
}
}
@ -194,9 +194,9 @@ async fn simple_search_two_indexes() {
"id": "450465",
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
}
}
@ -227,9 +227,9 @@ async fn simple_search_two_indexes() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
}
},
@ -249,9 +249,9 @@ async fn simple_search_two_indexes() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
}
}

View File

@ -285,10 +285,10 @@ async fn attributes_ranking_rule_order() {
@r###"
[
{
"id": "1"
"id": "2"
},
{
"id": "2"
"id": "1"
}
]
"###

View File

@ -1,20 +0,0 @@
---
source: meilisearch/tests/search/distinct.rs
---
{
"uid": 1,
"indexUid": "tamo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"filterableAttributes": [
"color.main"
]
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -1,18 +0,0 @@
---
source: meilisearch/tests/search/errors.rs
---
{
"uid": 0,
"indexUid": "tamo",
"status": "succeeded",
"type": "indexCreation",
"canceledBy": null,
"details": {
"primaryKey": null
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -98,7 +98,8 @@ async fn secrets_are_hidden_in_settings() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
"logsRoute": false,
"exportPuffinReports": false
}
"###);

View File

@ -1,809 +0,0 @@
use meili_snap::*;
use super::DOCUMENTS;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn similar_unexisting_index() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let expected_response = json!({
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
});
index
.similar(json!({"id": 287947}), |response, code| {
assert_eq!(code, 404);
assert_eq!(response, expected_response);
})
.await;
}
#[actix_rt::test]
async fn similar_unexisting_parameter() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
index
.similar(json!({"id": 287947, "marin": "hello"}), |response, code| {
assert_eq!(code, 400, "{}", response);
assert_eq!(response["code"], "bad_request");
})
.await;
}
#[actix_rt::test]
async fn similar_feature_not_enabled() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.similar_post(json!({"id": 287947})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using the similar API requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}
#[actix_rt::test]
async fn similar_bad_id() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
"code": "invalid_similar_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
}
"###);
}
#[actix_rt::test]
async fn similar_bad_ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`",
"code": "invalid_similar_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn similar_invalid_ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_similar_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn similar_invalid_id() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": "http://invalid-docid/"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
"code": "invalid_similar_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
}
"###);
}
#[actix_rt::test]
async fn similar_not_found_id() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Document `definitely-doesnt-exist` not found.",
"code": "not_found_similar_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#not_found_similar_id"
}
"###);
}
#[actix_rt::test]
async fn similar_bad_offset() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_similar_offset",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_offset"
}
"###);
let (response, code) = index.similar_get("id=287947&offset=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `offset`: could not parse `doggo` as a positive integer",
"code": "invalid_similar_offset",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_offset"
}
"###);
}
#[actix_rt::test]
async fn similar_bad_limit() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_similar_limit",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_limit"
}
"###);
let (response, code) = index.similar_get("id=287946&limit=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `limit`: could not parse `doggo` as a positive integer",
"code": "invalid_similar_limit",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_limit"
}
"###);
}
#[actix_rt::test]
async fn similar_bad_filter() {
// Since a filter is deserialized as a json Value it will never fail to deserialize.
// Thus the error message is not generated by deserr but written by us.
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
snapshot!(code, @"202 Accepted");
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
// Can't make the `filter` fail with a get search since it'll accept anything as a strings.
}
#[actix_rt::test]
async fn filter_invalid_syntax_object() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_invalid_syntax_array() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_invalid_syntax_string() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
#[actix_rt::test]
async fn filter_invalid_attribute_array() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_invalid_attribute_string() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_attribute_array() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_attribute_string() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_reserved_attribute_array() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_reserved_attribute_string() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_point_array() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_point_string() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn similar_bad_retrieve_vectors() {
let server = Server::new().await;
server.set_features(json!({"vectorStore": true})).await;
let index = server.index("test");
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
let (response, code) = index.similar_get("retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
let (response, code) = index.similar_get("retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`",
"code": "invalid_similar_retrieve_vectors",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors"
}
"###);
}

View File

@ -1,731 +0,0 @@
mod errors;
use meili_snap::{json_string, snapshot};
use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
// Three semantic properties:
// 1. magic, anything that reminds you of magic
// 2. authority, anything that inspires command
// 3. horror, anything that inspires fear or dread
"_vectors": { "manual": [0.8, 0.4, -0.5]},
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": { "manual": [0.6, 0.8, -0.2] },
},
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": { "manual": [0.1, 0.6, 0.8] },
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": { "manual": [0.7, 0.7, -0.4] },
},
{
"title": "All Quiet on the Western Front",
"release_year": 1930,
"id": "143",
"_vectors": { "manual": [-0.5, 0.3, 0.85] },
}
])
});
#[actix_rt::test]
async fn basic() {
let server = Server::new().await;
let index = server.index("test");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
}
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
}
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
0.699999988079071,
0.699999988079071,
-0.4000000059604645
]
],
"regenerate": false
}
}
},
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
"_vectors": {
"manual": {
"embeddings": [
[
0.800000011920929,
0.4000000059604645,
-0.5
]
],
"regenerate": false
}
}
}
]
"###);
})
.await;
index
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
0.699999988079071,
0.699999988079071,
-0.4000000059604645
]
],
"regenerate": false
}
}
},
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
"_vectors": {
"manual": {
"embeddings": [
[
0.800000011920929,
0.4000000059604645,
-0.5
]
],
"regenerate": false
}
}
},
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
}
},
{
"title": "All Quiet on the Western Front",
"release_year": 1930,
"id": "143",
"_vectors": {
"manual": {
"embeddings": [
[
-0.5,
0.30000001192092896,
0.8500000238418579
]
],
"regenerate": false
}
}
}
]
"###);
})
.await;
}
#[actix_rt::test]
async fn ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
},
"_rankingScore": 0.39060014486312866
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
0.699999988079071,
0.699999988079071,
-0.4000000059604645
]
],
"regenerate": false
}
},
"_rankingScore": 0.2819308042526245
},
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
"_vectors": {
"manual": {
"embeddings": [
[
0.800000011920929,
0.4000000059604645,
-0.5
]
],
"regenerate": false
}
},
"_rankingScore": 0.1662663221359253
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
},
"_rankingScore": 0.39060014486312866
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
0.699999988079071,
0.699999988079071,
-0.4000000059604645
]
],
"regenerate": false
}
},
"_rankingScore": 0.2819308042526245
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
},
"_rankingScore": 0.39060014486312866
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
},
"_rankingScore": 0.890957772731781
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @"[]");
},
)
.await;
}
#[actix_rt::test]
async fn filter() {
let server = Server::new().await;
let index = server.index("test");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title", "release_year"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
.similar(
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
}
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": {
"embeddings": [
[
0.699999988079071,
0.699999988079071,
-0.4000000059604645
]
],
"regenerate": false
}
}
},
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
"_vectors": {
"manual": {
"embeddings": [
[
0.800000011920929,
0.4000000059604645,
-0.5
]
],
"regenerate": false
}
}
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "All Quiet on the Western Front",
"release_year": 1930,
"id": "143",
"_vectors": {
"manual": {
"embeddings": [
[
-0.5,
0.30000001192092896,
0.8500000238418579
]
],
"regenerate": false
}
}
}
]
"###);
},
)
.await;
}
#[actix_rt::test]
async fn limit_and_offset() {
let server = Server::new().await;
let index = server.index("test");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": {
"embeddings": [
[
0.10000000149011612,
0.6000000238418579,
0.800000011920929
]
],
"regenerate": false
}
}
}
]
"###);
})
.await;
index
.similar(
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": {
"embeddings": [
[
0.6000000238418579,
0.800000011920929,
-0.20000000298023224
]
],
"regenerate": false
}
}
}
]
"###);
},
)
.await;
}

View File

@ -1,5 +1,6 @@
use std::time::Duration;
use actix_rt::time::sleep;
use meili_snap::{json_string, snapshot};
use meilisearch::option::ScheduleSnapshot;
use meilisearch::Opt;
@ -31,7 +32,6 @@ macro_rules! verify_snapshot {
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn perform_snapshot() {
let temp = tempfile::tempdir().unwrap();
let snapshot_dir = tempfile::tempdir().unwrap();
@ -53,29 +53,11 @@ async fn perform_snapshot() {
index.load_test_set().await;
let (task, code) = server.index("test1").create(Some("prim")).await;
meili_snap::snapshot!(code, @"202 Accepted");
server.index("test1").create(Some("prim")).await;
index.wait_task(task.uid()).await;
index.wait_task(2).await;
// wait for the _next task_ to process, aka the snapshot that should be enqueued at some point
println!("waited for the next task to finish");
let now = std::time::Instant::now();
let next_task = task.uid() + 1;
loop {
let (value, code) = index.get_task(next_task).await;
dbg!(&value);
if code != 404 && value["status"].as_str() == Some("succeeded") {
break;
}
if now.elapsed() > Duration::from_secs(30) {
panic!("The snapshot didn't schedule in 30s even though it was supposed to be scheduled every 2s: {}",
serde_json::to_string_pretty(&value).unwrap()
);
}
}
sleep(Duration::from_secs(2)).await;
let temp = tempfile::tempdir().unwrap();

View File

@ -1,589 +0,0 @@
mod rest;
mod settings;
use meili_snap::{json_string, snapshot};
use crate::common::index::Index;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
#[actix_rt::test]
async fn add_remove_user_provided() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
1.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [10, 10, 10] }},
{"id": 1, "name": "echo", "_vectors": { "manual": null }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
10.0,
10.0,
10.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (value, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 1,
"name": "echo",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
}
async fn generate_default_user_provided_documents(server: &Server) -> Index {
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }},
{"id": 2, "name": "billou", "_vectors": { "manual": [[2, 2, 2], [2, 2, 3]] }},
{"id": 3, "name": "intel", "_vectors": { "manual": { "regenerate": false, "embeddings": [3, 3, 3] }}},
{"id": 4, "name": "max", "_vectors": { "manual": { "regenerate": false, "embeddings": [[4, 4, 4], [4, 4, 5]] }}},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
}
#[actix_rt::test]
async fn user_provided_embeddings_error() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
// First case, we forget to specify the `regenerate`
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Second case, we don't specify anything
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Third case, we specify something wrong in place of regenerate
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 4,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 5,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 6,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 7,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 10,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 11,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 12,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn clear_documents() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
let (value, _code) = index.clear_all_documents().await;
index.wait_task(value.uid()).await;
// Make sure the documents DB has been cleared
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
snapshot!(documents, @r###"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0,
"semanticHitCount": 0
}
"###);
}
#[actix_rt::test]
async fn add_remove_one_vector_4588() {
// https://github.com/meilisearch/meilisearch/issues/4588
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, name: "settings-processed");
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-added");
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": null }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-deleted");
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
snapshot!(documents, @r###"
{
"hits": [
{
"id": 0,
"name": "kefir"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"semanticHitCount": 1
}
"###);
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {}
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
}

View File

@ -1,339 +0,0 @@
use crate::vector::GetAllDocumentsOptions;
use meili_snap::{json_string, snapshot};
use std::sync::atomic::{AtomicUsize, Ordering};
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
use crate::common::{Server, Value};
use crate::json;
static COUNTER: AtomicUsize = AtomicUsize::new(0);
async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/"))
.respond_with(|_req: &Request| {
let cpt = COUNTER.fetch_add(1, Ordering::Relaxed);
ResponseTemplate::new(200).set_body_json(json!({ "data": vec![cpt; 3] }))
})
.mount(&mock_server)
.await;
let url = mock_server.uri();
let embedder_settings = json!({
"source": "rest",
"url": url,
"dimensions": 3,
"query": {},
});
(mock_server, embedder_settings)
}
#[actix_rt::test]
async fn dummy_testing_the_mock() {
let (mock, _setting) = create_mock().await;
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[0,0,0]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[1,1,1]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[2,2,2]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[3,3,3]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[4,4,4]");
}
async fn get_server_vector() -> Server {
let server = Server::new().await;
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
server
}
#[actix_rt::test]
async fn bad_settings() {
let (mock, _setting) = create_mock().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest" }),
},
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.rest`: Missing field `url` (note: this field is mandatory for source rest)",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": "kefir" }),
},
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.rest.url`: could not parse `kefir`: relative URL without a base",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri() }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 0,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]"
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with user error: was expected 'input' to be an object in query 'null'.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {} }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {}
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `embedding` not found in path `embedding` in response: `{\n \"data\": [\n 0,\n 0,\n 0\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "pathToEmbeddings": ["data"] }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {},
"pathToEmbeddings": [
"data"
]
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `embedding` not found in path `embedding` in response: `{\n \"data\": [\n 1,\n 1,\n 1\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "embeddingObject": ["data"] }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {},
"embeddingObject": [
"data"
]
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `data` not found in path `data` in response: `{\n \"data\": [\n 2,\n 2,\n 2\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Validate an embedder with a bad dimension of 2 instead of 3
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "pathToEmbeddings": [], "embeddingObject": ["data"], "dimensions": 2 }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let (response, code) = index.add_documents(json!( { "id": 1, "name": "kefir" }), None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 5,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "An unexpected crash occurred when processing the task.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_vector_and_user_provided() {
let (_mock, setting) = create_mock().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": setting,
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel"},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @"");
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}

View File

@ -1,228 +0,0 @@
use meili_snap::{json_string, snapshot};
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
use crate::vector::generate_default_user_provided_documents;
#[actix_rt::test]
async fn update_embedder() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": { "manual": {}},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 2,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
let ret = server.wait_task(response.uid()).await;
snapshot!(ret, @r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 2
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn reset_embedder_documents() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
let (response, code) = index.delete_settings().await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
// Make sure the documents are still present
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: None,
offset: None,
retrieve_vectors: false,
fields: None,
})
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir"
},
{
"id": 1,
"name": "echo"
},
{
"id": 2,
"name": "billou"
},
{
"id": 3,
"name": "intel"
},
{
"id": 4,
"name": "max"
}
],
"offset": 0,
"limit": 20,
"total": 5
}
"###);
// Make sure we are still able to retrieve their vectors
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
0.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
1.0
]
],
"regenerate": false
}
}
},
{
"id": 2,
"name": "billou",
"_vectors": {
"manual": {
"embeddings": [
[
2.0,
2.0,
2.0
],
[
2.0,
2.0,
3.0
]
],
"regenerate": false
}
}
},
{
"id": 3,
"name": "intel",
"_vectors": {
"manual": {
"embeddings": [
[
3.0,
3.0,
3.0
]
],
"regenerate": false
}
}
},
{
"id": 4,
"name": "max",
"_vectors": {
"manual": {
"embeddings": [
[
4.0,
4.0,
4.0
],
[
4.0,
4.0,
5.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 5
}
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
snapshot!(json_string!(documents), @r###"
{
"message": "Cannot find embedder with name `default`.",
"code": "invalid_embedder",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_embedder"
}
"###);
}

View File

@ -1,19 +0,0 @@
---
source: meilisearch/tests/vector/mod.rs
---
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -1,19 +0,0 @@
---
source: meilisearch/tests/vector/mod.rs
---
{
"uid": 2,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -1,23 +0,0 @@
---
source: meilisearch/tests/vector/mod.rs
---
{
"uid": 0,
"indexUid": "doggo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}

View File

@ -80,7 +80,9 @@ fn main() -> anyhow::Result<()> {
/// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&path)
.with_context(|| format!("While trying to open {:?}", path.display()))?;
eprintln!("Deleting tasks from the database...");
@ -127,7 +129,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
}
}
eprintln!("Successfully deleted {count} content files from disk!");
eprintln!("Sucessfully deleted {count} content files from disk!");
Ok(())
}
@ -191,7 +193,9 @@ fn export_a_dump(
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&index_scheduler_path)
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Dumping the keys...");

View File

@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.9.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.8.11", default-features = false }
charabia = { version = "0.8.10", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.11"
deserr = "0.6.1"
@ -30,7 +30,7 @@ grenad = { version = "0.4.6", default-features = false, features = [
"rayon",
"tempfile",
] }
heed = { version = "0.20.1", default-features = false, features = [
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
"serde-json",
"serde-bincode",
"read-txn-no-tls",
@ -44,7 +44,7 @@ once_cell = "1.19.0"
ordered-float = "4.2.0"
rand_pcg = { version = "0.3.1", features = ["serde1"] }
rayon = "1.8.0"
roaring = { version = "0.10.2", features = ["serde"] }
roaring = "0.10.2"
rstar = { version = "0.11.0", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
@ -67,22 +67,25 @@ filter-parser = { path = "../filter-parser" }
# documents words self-join
itertools = "0.11.0"
# profiling
puffin = "0.16.0"
csv = "1.3.0"
candle-core = { version = "0.4.1" }
candle-transformers = { version = "0.4.1" }
candle-nn = { version = "0.4.1" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default_features = false, features = [
"onig",
] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
"online",
] }
tiktoken-rs = "0.5.8"
liquid = "0.26.4"
arroy = "0.4.0"
arroy = "0.2.0"
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.9.7", features = ["json"] }
ureq = { version = "2.9.6", features = ["json"] }
url = "2.5.0"
[dev-dependencies]

View File

@ -48,8 +48,8 @@ fn main() -> Result<(), Box<dyn Error>> {
let start = Instant::now();
let mut ctx = SearchContext::new(&index, &txn)?;
let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
let mut ctx = SearchContext::new(&index, &txn);
let universe = filtered_universe(&ctx, &None)?;
let docs = execute_search(
&mut ctx,
@ -59,7 +59,6 @@ fn main() -> Result<(), Box<dyn Error>> {
false,
universe,
&None,
&None,
GeoSortStrategy::default(),
0,
20,
@ -67,7 +66,6 @@ fn main() -> Result<(), Box<dyn Error>> {
&mut DefaultSearchLogger,
logger,
TimeBudget::max(),
None,
)?;
if let Some((logger, dir)) = detailed_logger {
logger.finish(&mut ctx, Path::new(dir))?;

View File

@ -1,3 +0,0 @@
target
corpus
artifacts

View File

@ -203,7 +203,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
"string" => (field_name, AllowedType::String),
"boolean" => (field_name, AllowedType::Boolean),
"number" => (field_name, AllowedType::Number),
// if the pattern isn't recognized, we keep the whole field.
// if the pattern isn't reconized, we keep the whole field.
_otherwise => (header, AllowedType::String),
},
None => (header, AllowedType::String),

View File

@ -12,10 +12,7 @@ use bimap::BiHashMap;
pub use builder::DocumentsBatchBuilder;
pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
use obkv::KvReader;
pub use primary_key::{
validate_document_id_value, DocumentIdExtractionError, FieldIdMapper, PrimaryKey,
DEFAULT_PRIMARY_KEY,
};
pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
use serde::{Deserialize, Serialize};

View File

@ -60,7 +60,7 @@ impl<'a> PrimaryKey<'a> {
Some(document_id_bytes) => {
let document_id = serde_json::from_slice(document_id_bytes)
.map_err(InternalError::SerdeJson)?;
match validate_document_id_value(document_id) {
match validate_document_id_value(document_id)? {
Ok(document_id) => Ok(Ok(document_id)),
Err(user_error) => {
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
@ -88,7 +88,7 @@ impl<'a> PrimaryKey<'a> {
}
match matching_documents_ids.pop() {
Some(document_id) => match validate_document_id_value(document_id) {
Some(document_id) => match validate_document_id_value(document_id)? {
Ok(document_id) => Ok(Ok(document_id)),
Err(user_error) => {
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
@ -159,14 +159,14 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
}
}
pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserError> {
pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
match document_id {
Value::String(string) => match validate_document_id(&string) {
Some(s) if s.len() == string.len() => Ok(string),
Some(s) => Ok(s.to_string()),
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
Some(s) if s.len() == string.len() => Ok(Ok(string)),
Some(s) => Ok(Ok(s.to_string())),
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
},
Value::Number(number) if number.is_i64() => Ok(number.to_string()),
content => Err(UserError::InvalidDocumentId { document_id: content }),
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
}
}

View File

@ -32,8 +32,6 @@ pub enum InternalError {
DatabaseClosing,
#[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
#[error("Missing {key} in the fieldids weights mapping.")]
FieldidsWeightsMapMissingEntry { key: FieldId },
#[error(transparent)]
FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
#[error("Missing {key} in the field id mapping.")]
@ -48,6 +46,8 @@ pub enum InternalError {
GrenadInvalidFormatVersion,
#[error("Invalid merge while processing {process}")]
IndexingMergingKeys { process: &'static str },
#[error("{}", HeedError::InvalidDatabaseTyping)]
InvalidDatabaseTyping,
#[error(transparent)]
RayonThreadPool(#[from] ThreadPoolBuildError),
#[error(transparent)]
@ -117,10 +117,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidGeoField(#[from] GeoError),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize },
#[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
InvalidVectorsType { document_id: Value, value: Value, subfield: String },
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
InvalidVectorsMapType { document_id: String, value: Value },
#[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError },
InvalidVectorsMapType { document_id: Value, value: Value },
#[error("{0}")]
InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
@ -136,17 +136,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field,
match .valid_fields.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
),
}
)]
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not facet-searchable. {}",
.field,
match .valid_fields.is_empty() {
@ -283,9 +272,8 @@ impl From<arroy::Error> for Error {
arroy::Error::DatabaseFull
| arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_) => {
| arroy::Error::MissingNode
| arroy::Error::MissingMetadata => {
Error::InternalError(InternalError::ArroyError(value))
}
}
@ -439,6 +427,7 @@ impl From<HeedError> for Error {
// TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
}

View File

@ -1,55 +0,0 @@
//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
use crate::{FieldId, FieldsIdsMap, Weight};
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct FieldidsWeightsMap {
map: HashMap<FieldId, Weight>,
}
impl FieldidsWeightsMap {
/// Insert a field id -> weigth into the map.
/// If the map did not have this key present, `None` is returned.
/// If the map did have this key present, the value is updated, and the old value is returned.
pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
self.map.insert(fid, weight)
}
/// Create the map from the fields ids maps.
/// Should only be called in the case there are NO searchable attributes.
/// All the fields will be inserted in the order of the fields ids map with a weight of 0.
pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
FieldidsWeightsMap {
map: fid_map
.iter()
.filter(|(_fid, name)| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
.map(|(fid, _name)| (fid, 0))
.collect(),
}
}
/// Removes a field id from the map, returning the associated weight previously in the map.
pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
self.map.remove(&fid)
}
/// Returns weight corresponding to the key.
pub fn weight(&self, fid: FieldId) -> Option<Weight> {
self.map.get(&fid).copied()
}
/// Returns highest weight contained in the map if any.
pub fn max_weight(&self) -> Option<Weight> {
self.map.values().copied().max()
}
/// Return an iterator visiting all field ids in arbitrary order.
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
self.map.keys().copied()
}
}

View File

@ -41,16 +41,6 @@ impl FieldsIdsMap {
}
}
/// Get the ids of a field and all its nested fields based on its name.
pub fn nested_ids(&self, name: &str) -> Vec<FieldId> {
self.names_ids
.range(name.to_string()..)
.take_while(|(key, _)| key.starts_with(name))
.filter(|(key, _)| crate::is_faceted_by(key, name))
.map(|(_name, id)| *id)
.collect()
}
/// Get the id of a field based on its name.
pub fn id(&self, name: &str) -> Option<FieldId> {
self.names_ids.get(name).copied()
@ -136,32 +126,4 @@ mod tests {
assert_eq!(iter.next(), Some((3, "title")));
assert_eq!(iter.next(), None);
}
#[test]
fn nested_fields() {
let mut map = FieldsIdsMap::new();
assert_eq!(map.insert("id"), Some(0));
assert_eq!(map.insert("doggo"), Some(1));
assert_eq!(map.insert("doggo.name"), Some(2));
assert_eq!(map.insert("doggolution"), Some(3));
assert_eq!(map.insert("doggo.breed.name"), Some(4));
assert_eq!(map.insert("description"), Some(5));
insta::assert_debug_snapshot!(map.nested_ids("doggo"), @r###"
[
1,
4,
2,
]
"###);
insta::assert_debug_snapshot!(map.nested_ids("doggo.breed"), @r###"
[
4,
]
"###);
insta::assert_debug_snapshot!(map.nested_ids("_vector"), @"[]");
}
}

View File

@ -47,12 +47,6 @@ pub struct FacetGroupValue {
pub bitmap: RoaringBitmap,
}
#[derive(Debug)]
pub struct FacetGroupLazyValue<'b> {
pub size: u8,
pub bitmap_bytes: &'b [u8],
}
pub struct FacetGroupKeyCodec<T> {
_phantom: PhantomData<T>,
}
@ -75,7 +69,6 @@ where
Ok(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
where
T: BytesDecode<'a>,
@ -91,7 +84,6 @@ where
}
pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
@ -101,23 +93,11 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
Ok(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Ok(FacetGroupValue { size, bitmap })
}
}
pub struct FacetGroupLazyValueCodec;
impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
type DItem = FacetGroupLazyValue<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
}
}

View File

@ -1,5 +1,5 @@
use std::borrow::Cow;
use std::io::{self, Cursor};
use std::io;
use std::mem::size_of;
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
@ -57,24 +57,6 @@ impl CboRoaringBitmapCodec {
}
}
pub fn intersection_with_serialized(
mut bytes: &[u8],
other: &RoaringBitmap,
) -> io::Result<RoaringBitmap> {
// See above `deserialize_from` method for implementation details.
if bytes.len() <= THRESHOLD * size_of::<u32>() {
let mut bitmap = RoaringBitmap::new();
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
if other.contains(integer) {
bitmap.insert(integer);
}
}
Ok(bitmap)
} else {
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
}
}
/// Merge serialized CboRoaringBitmaps in a buffer.
///
/// if the merged values length is under the threshold, values are directly
@ -213,7 +195,7 @@ mod tests {
fn merge_cbo_roaring_bitmaps() {
let mut buffer = Vec::new();
let small_data = [
let small_data = vec![
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
@ -227,7 +209,7 @@ mod tests {
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
assert_eq!(bitmap, expected);
let medium_data = [
let medium_data = vec![
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..8).unwrap(),

View File

@ -1,6 +1,5 @@
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::convert::TryInto;
use std::fs::File;
use std::path::Path;
@ -9,7 +8,6 @@ use heed::types::*;
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use roaring::RoaringBitmap;
use rstar::RTree;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use crate::documents::PrimaryKey;
@ -24,13 +22,11 @@ use crate::heed_codec::{
};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision;
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
use crate::vector::{Embedding, EmbeddingConfig};
use crate::vector::EmbeddingConfig;
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -46,7 +42,6 @@ pub mod main_key {
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
pub const GEO_RTREE_KEY: &str = "geo-rtree";
pub const PRIMARY_KEY_KEY: &str = "primary-key";
@ -186,7 +181,7 @@ impl Index {
options.max_dbs(25);
let env = unsafe { options.open(path) }?;
let env = options.open(path)?;
let mut wtxn = env.write_txn()?;
let main = env.database_options().name(MAIN).create(&mut wtxn)?;
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
@ -296,11 +291,6 @@ impl Index {
self.env.read_txn()
}
/// Create a static read transaction to be able to read the index without keeping a reference to it.
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
self.env.clone().static_read_txn()
}
/// Returns the canonicalized path where the heed `Env` of this `Index` lives.
pub fn path(&self) -> &Path {
self.env.path()
@ -424,65 +414,6 @@ impl Index {
.unwrap_or_default())
}
/* fieldids weights map */
// This maps the fields ids to their weights.
// Their weights is defined by the ordering of the searchable attributes.
/// Writes the fieldids weights map which associates the field ids to their weights
pub(crate) fn put_fieldids_weights_map(
&self,
wtxn: &mut RwTxn,
map: &FieldidsWeightsMap,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::FIELDIDS_WEIGHTS_MAP_KEY,
map,
)
}
/// Get the fieldids weights map which associates the field ids to their weights
pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
self.main
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
.map(Ok)
.unwrap_or_else(|| {
Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
&self.fields_ids_map(rtxn)?,
))
})
}
/// Delete the fieldsids weights map
pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
}
pub fn searchable_fields_and_weights<'a>(
&self,
rtxn: &'a RoTxn,
) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
let fid_map = self.fields_ids_map(rtxn)?;
let weight_map = self.fieldids_weights_map(rtxn)?;
let searchable = self.searchable_fields(rtxn)?;
searchable
.into_iter()
.map(|field| -> Result<_> {
let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: field.to_string(),
process: "searchable_fields_and_weights",
})?;
let weight = weight_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
Ok((field, fid, weight))
})
.collect()
}
/* geo rtree */
/// Writes the provided `rtree` which associates coordinates to documents ids.
@ -646,45 +577,34 @@ impl Index {
&self,
wtxn: &mut RwTxn,
user_fields: &[&str],
non_searchable_fields_ids: &[FieldId],
fields_ids_map: &FieldsIdsMap,
) -> Result<()> {
) -> heed::Result<()> {
// We can write the user defined searchable fields as-is.
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
let mut weights = FieldidsWeightsMap::default();
// Now we generate the real searchable fields:
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
// 2. Iterate over the user defined searchable fields.
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
let mut real_fields = Vec::new();
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
let mut real_fields = user_fields.to_vec();
for (id, field_from_map) in fields_ids_map.iter() {
for (weight, user_field) in user_fields.iter().enumerate() {
for field_from_map in fields_ids_map.names() {
for user_field in user_fields {
if crate::is_faceted_by(field_from_map, user_field)
&& !real_fields.contains(&field_from_map)
&& !non_searchable_fields_ids.contains(&id)
&& !user_fields.contains(&field_from_map)
{
real_fields.push(field_from_map);
let weight: u16 =
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
weights.insert(id, weight);
}
}
}
self.put_searchable_fields(wtxn, &real_fields)?;
self.put_fieldids_weights_map(wtxn, &weights)?;
Ok(())
self.put_searchable_fields(wtxn, &real_fields)
}
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
self.delete_fieldids_weights_map(wtxn)?;
Ok(did_delete_searchable || did_delete_user_defined)
}
@ -703,32 +623,28 @@ impl Index {
}
/// Returns the searchable fields, those are the fields that are indexed,
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> {
/// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
.map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
.unwrap_or_else(|| {
Ok(self
.fields_ids_map(rtxn)?
.names()
.filter(|name| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
.map(|field| Cow::Owned(field.to_string()))
.collect())
})
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
}
/// Identical to `searchable_fields`, but returns the ids instead.
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> {
let fields = self.searchable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = Vec::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(&name) {
fields_ids.push(field_id);
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
match self.searchable_fields(rtxn)? {
Some(fields) => {
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = Vec::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(name) {
fields_ids.push(field_id);
}
}
Ok(Some(fields_ids))
}
None => Ok(None),
}
Ok(fields_ids)
}
/// Writes the searchable fields, when this list is specified, only these are indexed.
@ -1573,16 +1489,12 @@ impl Index {
Ok(script_language)
}
/// Put the embedding configs:
/// 1. The name of the embedder
/// 2. The configuration option for this embedder
/// 3. The list of documents with a user provided embedding
pub(crate) fn put_embedding_configs(
&self,
wtxn: &mut RwTxn<'_>,
configs: Vec<IndexEmbeddingConfig>,
configs: Vec<(String, EmbeddingConfig)>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>().put(
self.main.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>().put(
wtxn,
main_key::EMBEDDING_CONFIGS,
&configs,
@ -1593,30 +1505,17 @@ impl Index {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS)
}
pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> Result<Vec<IndexEmbeddingConfig>> {
pub fn embedding_configs(
&self,
rtxn: &RoTxn<'_>,
) -> Result<Vec<(String, crate::vector::EmbeddingConfig)>> {
Ok(self
.main
.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>()
.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>()
.get(rtxn, main_key::EMBEDDING_CONFIGS)?
.unwrap_or_default())
}
pub fn arroy_readers<'a>(
&'a self,
rtxn: &'a RoTxn<'a>,
embedder_id: u8,
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::Angular>>> + 'a {
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
arroy::Reader::open(rtxn, k, self.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e.into()),
})
.transpose()
})
}
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
}
@ -1628,51 +1527,6 @@ impl Index {
pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
}
pub fn embeddings(
&self,
rtxn: &RoTxn<'_>,
docid: DocumentId,
) -> Result<BTreeMap<String, Vec<Embedding>>> {
let mut res = BTreeMap::new();
for row in self.embedder_category_id.iter(rtxn)? {
let (embedder_name, embedder_id) = row?;
let embedder_id = (embedder_id as u16) << 8;
let mut embeddings = Vec::new();
'vectors: for i in 0..=u8::MAX {
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e),
})
.transpose();
let Some(reader) = reader else {
break 'vectors;
};
let embedding = reader?.item_vector(rtxn, docid)?;
if let Some(embedding) = embedding {
embeddings.push(embedding)
} else {
break 'vectors;
}
}
if !embeddings.is_empty() {
res.insert(embedder_name.to_owned(), embeddings);
}
}
Ok(res)
}
}
#[derive(Debug, Deserialize, Serialize)]
pub struct IndexEmbeddingConfig {
pub name: String,
pub config: EmbeddingConfig,
pub user_provided: RoaringBitmap,
}
#[cfg(test)]
@ -1682,17 +1536,15 @@ pub(crate) mod tests {
use big_s::S;
use heed::{EnvOpenOptions, RwTxn};
use maplit::{btreemap, hashset};
use maplit::hashset;
use tempfile::TempDir;
use crate::documents::DocumentsBatchReader;
use crate::error::{Error, InternalError};
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::update::{
self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
Settings,
self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
};
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult};
pub(crate) struct TempIndex {
@ -1858,14 +1710,10 @@ pub(crate) mod tests {
]))
.unwrap();
db_snap!(index, field_distribution, @r###"
age 1 |
id 2 |
name 2 |
"###);
db_snap!(index, field_distribution, 1);
db_snap!(index, word_docids,
@r###"
@r###"
1 [0, ]
2 [1, ]
20 [1, ]
@ -1874,6 +1722,18 @@ pub(crate) mod tests {
"###
);
db_snap!(index, field_distribution);
db_snap!(index, field_distribution,
@r###"
age 1 |
id 2 |
name 2 |
"###
);
// snapshot_index!(&index, "1", include: "^field_distribution$");
// we add all the documents a second time. we are supposed to get the same
// field_distribution in the end
index
@ -1960,7 +1820,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
@ -1980,7 +1840,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(user_defined, &["doggo", "name"]);
@ -1996,7 +1856,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
@ -2535,14 +2395,6 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
index
.add_documents(documents!([
@ -2558,16 +2410,6 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
index.delete_documents(Default::default());
@ -2578,16 +2420,6 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
index
.add_documents(documents!([
@ -2603,16 +2435,6 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
@ -2698,195 +2520,4 @@ pub(crate) mod tests {
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
}
#[test]
fn swapping_searchable_attributes() {
// See https://github.com/meilisearch/meilisearch/issues/4484
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
})
.unwrap();
index
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("realName")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
})
.unwrap();
// The order of the field id map shouldn't change
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
3 1 |
"###);
}
#[test]
fn attribute_weights_after_swapping_searchable_attributes() {
// See https://github.com/meilisearch/meilisearch/issues/4484
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("beverage")]);
})
.unwrap();
index
.add_documents(documents!([
{ "id": 0, "name": "kefir", "beverage": "water" },
{ "id": 1, "name": "tamo", "beverage": "kefir" }
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("kefir").execute().unwrap();
// We should find kefir the dog first
insta::assert_debug_snapshot!(results.documents_ids, @r###"
[
0,
1,
]
"###);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("beverage"), S("name")]);
})
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("kefir").execute().unwrap();
// We should find tamo first
insta::assert_debug_snapshot!(results.documents_ids, @r###"
[
1,
0,
]
"###);
}
#[test]
fn vectors_are_never_indexed_as_searchable_or_filterable() {
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 0, "_vectors": { "doggo": [2345] } },
{ "id": 1, "_vectors": { "doggo": [6789] } },
]))
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 _vectors |
2 _vectors.doggo |
"###);
db_snap!(index, searchable_fields, @r###"["id"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("2345").execute().unwrap();
assert!(results.candidates.is_empty());
drop(rtxn);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]);
})
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 _vectors |
2 _vectors.doggo |
"###);
db_snap!(index, searchable_fields, @"[]");
db_snap!(index, fieldids_weights_map, @r###"
fid weight
"###);
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("2345").execute().unwrap();
assert!(results.candidates.is_empty());
let mut search = index.search(&rtxn);
let results = search
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
.execute()
.unwrap();
assert!(results.candidates.is_empty());
index
.update_settings(|settings| {
settings.set_embedder_settings(btreemap! {
S("doggo") => Setting::Set(EmbeddingSettings {
dimensions: Setting::Set(1),
source: Setting::Set(EmbedderSource::UserProvided),
..EmbeddingSettings::default()}),
});
})
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 _vectors |
2 _vectors.doggo |
"###);
db_snap!(index, searchable_fields, @"[]");
db_snap!(index, fieldids_weights_map, @r###"
fid weight
"###);
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("2345").execute().unwrap();
assert!(results.candidates.is_empty());
let mut search = index.search(&rtxn);
let results = search
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
.execute()
.unwrap();
assert!(results.candidates.is_empty());
}
}

View File

@ -28,7 +28,6 @@ pub mod vector;
#[cfg(test)]
#[macro_use]
pub mod snapshot_tests;
mod fieldids_weights_map;
use std::collections::{BTreeMap, HashMap};
use std::convert::{TryFrom, TryInto};
@ -53,7 +52,6 @@ pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
};
pub use self::external_documents_ids::ExternalDocumentsIds;
pub use self::fieldids_weights_map::FieldidsWeightsMap;
pub use self::fields_ids_map::FieldsIdsMap;
pub use self::heed_codec::{
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
@ -63,7 +61,6 @@ pub use self::heed_codec::{
};
pub use self::index::Index;
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
pub use self::search::similar::Similar;
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
@ -80,7 +77,6 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
pub type FieldDistribution = BTreeMap<String, u64>;
pub type FieldId = u16;
pub type Weight = u16;
pub type Object = serde_json::Map<String, serde_json::Value>;
pub type Position = u32;
pub type RelativePosition = u16;
@ -355,13 +351,43 @@ pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsR
/// assert!(!is_faceted_by("animaux.chien", "animaux.chie"));
/// ```
pub fn is_faceted_by(field: &str, facet: &str) -> bool {
field.starts_with(facet) && field[facet.len()..].chars().next().map_or(true, |c| c == '.')
field.starts_with(facet)
&& field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
}
pub fn normalize_facet(original: &str) -> String {
CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
}
/// Represents either a vector or an array of multiple vectors.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
#[serde(transparent)]
pub struct VectorOrArrayOfVectors {
#[serde(with = "either::serde_untagged_optional")]
inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
}
impl VectorOrArrayOfVectors {
pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
match self.inner? {
either::Either::Left(vector) => Some(vec![vector]),
either::Either::Right(vectors) => Some(vectors),
}
}
}
/// Normalize a vector by dividing the dimensions by the length of it.
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
let squared: f32 = vector.iter().map(|x| x * x).sum();
let length = squared.sqrt();
if length <= f32::EPSILON {
vector
} else {
vector.iter_mut().for_each(|x| *x /= length);
vector
}
}
#[cfg(test)]
mod tests {
use serde_json::json;

View File

@ -6,11 +6,9 @@ use heed::Result;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::BytesRefCodec;
use crate::{CboRoaringBitmapCodec, DocumentId};
use crate::DocumentId;
/// Call the given closure on the facet distribution of the candidate documents.
///
@ -33,11 +31,14 @@ pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
let highest_level = get_highest_level(rtxn, db, field_id)?;
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
Ok(())
} else {
@ -74,10 +75,13 @@ where
// Represents the list of keys that we must explore.
let mut heap = BinaryHeap::new();
let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
let highest_level = get_highest_level(rtxn, db, field_id)?;
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
// We first fill the heap with values from the highest level
let starting_key =
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
@ -88,10 +92,7 @@ where
if key.field_id != field_id {
break;
}
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
let intersection = value.bitmap & candidates;
let count = intersection.len();
if count != 0 {
heap.push(LevelEntry {
@ -120,10 +121,7 @@ where
if key.field_id != field_id {
break;
}
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
let intersection = value.bitmap & candidates;
let count = intersection.len();
if count != 0 {
heap.push(LevelEntry {
@ -148,7 +146,7 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
callback: CB,
}
@ -173,10 +171,7 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
let docids_in_common = value.bitmap & candidates;
if !docids_in_common.is_empty() {
let any_docid_in_common = docids_in_common.min().unwrap();
match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)?
@ -210,10 +205,7 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
let docids_in_common = value.bitmap & candidates;
if !docids_in_common.is_empty() {
let cf = self.iterate(
&docids_in_common,

Some files were not shown because too many files have changed in this diff Show More