Compare commits

..

1 Commits

Author SHA1 Message Date
Tamo
2141cb3b69 add tests on the rest embedder 2024-07-01 12:05:02 +02:00
17 changed files with 544 additions and 155 deletions

View File

@@ -1,6 +1,4 @@
name: Look for flaky tests name: Look for flaky tests
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on: on:
workflow_dispatch: workflow_dispatch:
schedule: schedule:

View File

@@ -1,6 +1,5 @@
name: Run the indexing fuzzer name: Run the indexing fuzzer
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on: on:
push: push:
branches: branches:

View File

@@ -15,8 +15,6 @@ jobs:
debian: debian:
name: Publish debian packagge name: Publish debian packagge
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
container: container:

View File

@@ -35,8 +35,6 @@ jobs:
publish-linux: publish-linux:
name: Publish binary for Linux name: Publish binary for Linux
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
needs: check-version needs: check-version
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27 # Use ubuntu-18.04 to compile with glibc 2.27
@@ -134,8 +132,6 @@ jobs:
name: Publish binary for aarch64 name: Publish binary for aarch64
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27 # Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04 image: ubuntu:18.04

View File

@@ -21,8 +21,6 @@ jobs:
test-linux: test-linux:
name: Tests on ubuntu-18.04 name: Tests on ubuntu-18.04
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04 image: ubuntu:18.04
@@ -79,8 +77,6 @@ jobs:
test-all-features: test-all-features:
name: Tests almost all features name: Tests almost all features
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04 image: ubuntu:18.04
@@ -104,8 +100,6 @@ jobs:
test-disabled-tokenization: test-disabled-tokenization:
name: Test disabled tokenization name: Test disabled tokenization
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: ubuntu:18.04 image: ubuntu:18.04
@@ -133,8 +127,6 @@ jobs:
# We run tests in debug also, to make sure that the debug_assertions are hit # We run tests in debug also, to make sure that the debug_assertions are hit
test-debug: test-debug:
name: Run tests in debug name: Run tests in debug
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations

183
Cargo.lock generated
View File

@@ -55,7 +55,7 @@ dependencies = [
"encoding_rs", "encoding_rs",
"flate2", "flate2",
"futures-core", "futures-core",
"h2", "h2 0.3.26",
"http 0.2.11", "http 0.2.11",
"httparse", "httparse",
"httpdate", "httpdate",
@@ -456,6 +456,12 @@ dependencies = [
"critical-section", "critical-section",
] ]
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.2.0" version = "1.2.0"
@@ -503,7 +509,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
@@ -648,7 +654,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@@ -1338,6 +1344,24 @@ dependencies = [
"syn 2.0.60", "syn 2.0.60",
] ]
[[package]]
name = "deadpool"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb84100978c1c7b37f09ed3ce3e5f843af02c2a2c431bae5b19230dad2c1b490"
dependencies = [
"async-trait",
"deadpool-runtime",
"num_cpus",
"tokio",
]
[[package]]
name = "deadpool-runtime"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
[[package]] [[package]]
name = "debugid" name = "debugid"
version = "0.8.0" version = "0.8.0"
@@ -1579,7 +1603,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -1804,7 +1828,7 @@ dependencies = [
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"faux", "faux",
"tempfile", "tempfile",
@@ -1827,7 +1851,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@@ -1847,7 +1871,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -1965,7 +1989,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"clap", "clap",
@@ -2213,6 +2237,25 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "h2"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
dependencies = [
"atomic-waker",
"bytes",
"fnv",
"futures-core",
"futures-sink",
"http 1.0.0",
"indexmap",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "half" name = "half"
version = "1.8.2" version = "1.8.2"
@@ -2378,6 +2421,29 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
] ]
[[package]]
name = "http-body"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643"
dependencies = [
"bytes",
"http 1.0.0",
]
[[package]]
name = "http-body-util"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
dependencies = [
"bytes",
"futures-util",
"http 1.0.0",
"http-body 1.0.0",
"pin-project-lite",
]
[[package]] [[package]]
name = "httparse" name = "httparse"
version = "1.8.0" version = "1.8.0"
@@ -2400,9 +2466,9 @@ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
"futures-util", "futures-util",
"h2", "h2 0.3.26",
"http 0.2.11", "http 0.2.11",
"http-body", "http-body 0.4.5",
"httparse", "httparse",
"httpdate", "httpdate",
"itoa", "itoa",
@@ -2414,6 +2480,27 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"h2 0.4.5",
"http 1.0.0",
"http-body 1.0.0",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"smallvec",
"tokio",
"want",
]
[[package]] [[package]]
name = "hyper-rustls" name = "hyper-rustls"
version = "0.24.1" version = "0.24.1"
@@ -2422,12 +2509,27 @@ checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97"
dependencies = [ dependencies = [
"futures-util", "futures-util",
"http 0.2.11", "http 0.2.11",
"hyper", "hyper 0.14.27",
"rustls 0.21.12", "rustls 0.21.12",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls",
] ]
[[package]]
name = "hyper-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56"
dependencies = [
"bytes",
"futures-util",
"http 1.0.0",
"http-body 1.0.0",
"hyper 1.3.1",
"pin-project-lite",
"tokio",
]
[[package]] [[package]]
name = "ident_case" name = "ident_case"
version = "1.0.1" version = "1.0.1"
@@ -2452,7 +2554,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy", "arroy",
@@ -2649,7 +2751,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -3257,7 +3359,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@@ -3266,7 +3368,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@@ -3352,13 +3454,14 @@ dependencies = [
"urlencoding", "urlencoding",
"uuid", "uuid",
"walkdir", "walkdir",
"wiremock",
"yaup", "yaup",
"zip", "zip",
] ]
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"base64 0.21.7", "base64 0.21.7",
"enum-iterator", "enum-iterator",
@@ -3377,7 +3480,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@@ -3407,7 +3510,7 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@@ -3446,7 +3549,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"arroy", "arroy",
"big_s", "big_s",
@@ -3886,7 +3989,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@@ -4348,10 +4451,10 @@ dependencies = [
"encoding_rs", "encoding_rs",
"futures-core", "futures-core",
"futures-util", "futures-util",
"h2", "h2 0.3.26",
"http 0.2.11", "http 0.2.11",
"http-body", "http-body 0.4.5",
"hyper", "hyper 0.14.27",
"hyper-rustls", "hyper-rustls",
"ipnet", "ipnet",
"js-sys", "js-sys",
@@ -5098,9 +5201,9 @@ dependencies = [
[[package]] [[package]]
name = "time" name = "time"
version = "0.3.36" version = "0.3.34"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
dependencies = [ dependencies = [
"deranged", "deranged",
"itoa", "itoa",
@@ -5121,9 +5224,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]] [[package]]
name = "time-macros" name = "time-macros"
version = "0.2.18" version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
dependencies = [ dependencies = [
"num-conv", "num-conv",
"time-core", "time-core",
@@ -6031,6 +6134,30 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "wiremock"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec874e1eef0df2dcac546057fe5e29186f09c378181cd7b635b4b7bcc98e9d81"
dependencies = [
"assert-json-diff",
"async-trait",
"base64 0.21.7",
"deadpool",
"futures",
"http 1.0.0",
"http-body-util",
"hyper 1.3.1",
"hyper-util",
"log",
"once_cell",
"regex",
"serde",
"serde_json",
"tokio",
"url",
]
[[package]] [[package]]
name = "xattr" name = "xattr"
version = "1.0.1" version = "1.0.1"
@@ -6042,7 +6169,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.9.1" version = "1.9.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",

View File

@@ -22,7 +22,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.9.1" version = "1.9.0"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",

View File

@@ -118,6 +118,7 @@ maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6" temp-env = "0.3.6"
urlencoding = "2.1.3" urlencoding = "2.1.3"
wiremock = "0.6.0"
yaup = "0.2.1" yaup = "0.2.1"
[build-dependencies] [build-dependencies]

View File

@@ -65,7 +65,14 @@ impl Display for Value {
write!( write!(
f, f,
"{}", "{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" }) json_string!(self, {
".enqueuedAt" => "[date]",
".startedAt" => "[date]",
".finishedAt" => "[date]",
".duration" => "[duration]",
".processingTimeMs" => "[duration]",
".details.embedders.*.url" => "[url]"
})
) )
} }
} }

View File

@@ -644,12 +644,7 @@ async fn get_document_with_vectors() {
{ {
"id": 1, "id": 1,
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,
@@ -705,12 +700,7 @@ async fn get_document_with_vectors() {
}, },
{ {
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,

View File

@@ -150,35 +150,6 @@ async fn simple_search() {
snapshot!(response["semanticHitCount"], @"3"); snapshot!(response["semanticHitCount"], @"3");
} }
#[actix_rt::test]
async fn limit_offset() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"0");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"1");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
}
#[actix_rt::test] #[actix_rt::test]
async fn simple_search_hf() { async fn simple_search_hf() {
let server = Server::new().await; let server = Server::new().await;

View File

@@ -1,3 +1,4 @@
mod rest;
mod settings; mod settings;
use meili_snap::{json_string, snapshot}; use meili_snap::{json_string, snapshot};
@@ -119,12 +120,7 @@ async fn add_remove_user_provided() {
{ {
"id": 1, "id": 1,
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,
@@ -146,12 +142,7 @@ async fn add_remove_user_provided() {
{ {
"id": 1, "id": 1,
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,
@@ -587,12 +578,7 @@ async fn add_remove_one_vector_4588() {
{ {
"id": 0, "id": 0,
"name": "kefir", "name": "kefir",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,

View File

@@ -0,0 +1,339 @@
use crate::vector::GetAllDocumentsOptions;
use meili_snap::{json_string, snapshot};
use std::sync::atomic::{AtomicUsize, Ordering};
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
use crate::common::{Server, Value};
use crate::json;
static COUNTER: AtomicUsize = AtomicUsize::new(0);
async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/"))
.respond_with(|_req: &Request| {
let cpt = COUNTER.fetch_add(1, Ordering::Relaxed);
ResponseTemplate::new(200).set_body_json(json!({ "data": vec![cpt; 3] }))
})
.mount(&mock_server)
.await;
let url = mock_server.uri();
let embedder_settings = json!({
"source": "rest",
"url": url,
"dimensions": 3,
"query": {},
});
(mock_server, embedder_settings)
}
#[actix_rt::test]
async fn dummy_testing_the_mock() {
let (mock, _setting) = create_mock().await;
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[0,0,0]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[1,1,1]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[2,2,2]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[3,3,3]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[4,4,4]");
}
async fn get_server_vector() -> Server {
let server = Server::new().await;
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
server
}
#[actix_rt::test]
async fn bad_settings() {
let (mock, _setting) = create_mock().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest" }),
},
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.rest`: Missing field `url` (note: this field is mandatory for source rest)",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": "kefir" }),
},
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.rest.url`: could not parse `kefir`: relative URL without a base",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri() }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 0,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]"
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with user error: was expected 'input' to be an object in query 'null'.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {} }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {}
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `embedding` not found in path `embedding` in response: `{\n \"data\": [\n 0,\n 0,\n 0\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "pathToEmbeddings": ["data"] }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {},
"pathToEmbeddings": [
"data"
]
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `embedding` not found in path `embedding` in response: `{\n \"data\": [\n 1,\n 1,\n 1\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "embeddingObject": ["data"] }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {},
"embeddingObject": [
"data"
]
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `data` not found in path `data` in response: `{\n \"data\": [\n 2,\n 2,\n 2\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Validate an embedder with a bad dimension of 2 instead of 3
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "pathToEmbeddings": [], "embeddingObject": ["data"], "dimensions": 2 }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let (response, code) = index.add_documents(json!( { "id": 1, "name": "kefir" }), None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 5,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "An unexpected crash occurred when processing the task.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_vector_and_user_provided() {
let (_mock, setting) = create_mock().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": setting,
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel"},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @"");
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}

View File

@@ -141,6 +141,3 @@ swedish-recomposition = ["charabia/swedish-recomposition"]
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306> # allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"] cuda = ["candle-core/cuda"]
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] }

View File

@@ -1660,7 +1660,9 @@ impl Index {
} }
} }
res.insert(embedder_name.to_owned(), embeddings); if !embeddings.is_empty() {
res.insert(embedder_name.to_owned(), embeddings);
}
} }
Ok(res) Ok(res)
} }

View File

@@ -178,16 +178,16 @@ impl<'a> Search<'a> {
// completely skip semantic search if the results of the keyword search are good enough // completely skip semantic search if the results of the keyword search are good enough
if self.results_good_enough(&keyword_results, semantic_ratio) { if self.results_good_enough(&keyword_results, semantic_ratio) {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); return Ok((keyword_results, Some(0)));
} }
// no vector search against placeholder search // no vector search against placeholder search
let Some(query) = search.query.take() else { let Some(query) = search.query.take() else {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); return Ok((keyword_results, Some(0)));
}; };
// no embedder, no semantic search // no embedder, no semantic search
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); return Ok((keyword_results, Some(0)));
}; };
let vector_query = match vector { let vector_query = match vector {
@@ -239,44 +239,3 @@ impl<'a> Search<'a> {
true true
} }
} }
fn return_keyword_results(
limit: usize,
offset: usize,
SearchResult {
matching_words,
candidates,
mut documents_ids,
mut document_scores,
degraded,
used_negative_operator,
}: SearchResult,
) -> (SearchResult, Option<u32>) {
let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
// technically redudant because documents_ids.len() == document_scores.len(),
// defensive programming
offset >= document_scores.len()
{
(vec![], vec![])
} else {
// PANICS: offset < len
documents_ids.rotate_left(offset);
documents_ids.truncate(limit);
// PANICS: offset < len
document_scores.rotate_left(offset);
document_scores.truncate(limit);
(documents_ids, document_scores)
};
(
SearchResult {
matching_words,
candidates,
documents_ids,
document_scores,
degraded,
used_negative_operator,
},
Some(0),
)
}

View File

@@ -2180,6 +2180,33 @@ mod tests {
index.add_documents(doc1).unwrap(); index.add_documents(doc1).unwrap();
} }
#[cfg(feature = "default")]
#[test]
fn store_detected_script_and_language_per_document_during_indexing() {
use charabia::{Language, Script};
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let key_jpn = (Script::Cj, Language::Jpn);
let key_cmn = (Script::Cj, Language::Cmn);
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
let expected_cj_jpn_docids = [3].iter().collect();
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
let expected_cj_cmn_docids = [1, 5].iter().collect();
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
}
#[test] #[test]
fn add_and_delete_documents_in_single_transform() { fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new(); let mut index = TempIndex::new();