mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-22 04:36:32 +00:00
Compare commits
66 Commits
v1.23.0
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbe56822f0 | ||
|
|
5e1af30b42 | ||
|
|
d298b21a95 | ||
|
|
a4ad87febf | ||
|
|
2caa2be441 | ||
|
|
a829ded023 | ||
|
|
98be43b66b | ||
|
|
3ec4750426 | ||
|
|
d717ec3486 | ||
|
|
0c06bdefac | ||
|
|
62a8133bcd | ||
|
|
c1f7542dfa | ||
|
|
9105e8bb8c | ||
|
|
59e2394e69 | ||
|
|
430bc91c4c | ||
|
|
1172dce093 | ||
|
|
21140a33e6 | ||
|
|
9ed7a81495 | ||
|
|
c8c1d95efd | ||
|
|
10d1e26478 | ||
|
|
cd781d267b | ||
|
|
b16af4a763 | ||
|
|
1514d13ab3 | ||
|
|
d51f13a59e | ||
|
|
6df196034e | ||
|
|
a63762737c | ||
|
|
77394bd4b9 | ||
|
|
cb87201c8b | ||
|
|
1a9c38794f | ||
|
|
34233efb63 | ||
|
|
af0608ebd6 | ||
|
|
8c7e5c094e | ||
|
|
c064737137 | ||
|
|
1d188a7ad3 | ||
|
|
66a6b65716 | ||
|
|
326652a399 | ||
|
|
59316e8d5a | ||
|
|
76d7f20c87 | ||
|
|
380b2797a5 | ||
|
|
1dd58f9bec | ||
|
|
ddc76ad0dc | ||
|
|
ffacf1c002 | ||
|
|
5a49b93b77 | ||
|
|
918a6eaec9 | ||
|
|
1e6ce70e3e | ||
|
|
b418054ee4 | ||
|
|
58f30e9d8a | ||
|
|
c45172a4bf | ||
|
|
221ba20083 | ||
|
|
93c5fbbb8b | ||
|
|
22d529523a | ||
|
|
ed6f479940 | ||
|
|
f19f712433 | ||
|
|
24a92c2809 | ||
|
|
443cc24408 | ||
|
|
e8d5228250 | ||
|
|
5c33fb090c | ||
|
|
48dd9146e7 | ||
|
|
c1c42e818e | ||
|
|
519905ef9c | ||
|
|
f242377d2b | ||
|
|
da06306274 | ||
|
|
b93b803a2e | ||
|
|
cf43ec4aff | ||
|
|
9795d98e77 | ||
|
|
9f4dcd04e9 |
131
Cargo.lock
generated
131
Cargo.lock
generated
@@ -589,7 +589,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bumpalo",
|
||||
@@ -799,7 +799,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@@ -1123,7 +1123,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "159fa412eae48a1d94d0b9ecdb85c97ce56eb2a347c62394d3fdbf221adabc1a"
|
||||
dependencies = [
|
||||
"path-matchers",
|
||||
"path-slash",
|
||||
"path-slash 0.1.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1829,7 +1829,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -2072,7 +2072,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"tempfile",
|
||||
"thiserror 2.0.16",
|
||||
@@ -2094,7 +2094,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"levenshtein_automata",
|
||||
@@ -2122,7 +2122,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2279,7 +2279,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"bumpalo",
|
||||
@@ -2758,9 +2758,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hannoy"
|
||||
version = "0.0.9-nested-rtxns"
|
||||
version = "0.0.9-nested-rtxns-2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc5a945b92b063e677d658cfcc7cb6dec2502fe44631f017084938f14d6ce30e"
|
||||
checksum = "06eda090938d9dcd568c8c2a5de383047ed9191578ebf4a342d2975d16e621f2"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -2838,9 +2838,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.22.1-nested-rtxns"
|
||||
version = "0.22.1-nested-rtxns-2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ff115ba5712b1f1fc7617b195f5c2f139e29c397ff79da040cd19db75ccc240"
|
||||
checksum = "a644ab0b1e8234a7c82e83e26b24af1e4a4b8317629fa6ccd9ea2608c1595c73"
|
||||
dependencies = [
|
||||
"bitflags 2.9.4",
|
||||
"byteorder",
|
||||
@@ -3233,7 +3233,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"backoff",
|
||||
@@ -3242,6 +3242,7 @@ dependencies = [
|
||||
"bumpalo",
|
||||
"bumparaw-collections",
|
||||
"byte-unit",
|
||||
"bytes",
|
||||
"convert_case 0.8.0",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
@@ -3258,14 +3259,19 @@ dependencies = [
|
||||
"meilisearch-types",
|
||||
"memmap2",
|
||||
"page_size",
|
||||
"path-slash 0.2.1",
|
||||
"rayon",
|
||||
"reqwest",
|
||||
"roaring 0.10.12",
|
||||
"rusty-s3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"synchronoise",
|
||||
"tar",
|
||||
"tempfile",
|
||||
"thiserror 2.0.16",
|
||||
"time",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"ureq",
|
||||
"uuid",
|
||||
@@ -3465,6 +3471,30 @@ dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiff"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
|
||||
dependencies = [
|
||||
"jiff-static",
|
||||
"log",
|
||||
"portable-atomic",
|
||||
"portable-atomic-util",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiff-static"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.34"
|
||||
@@ -3487,7 +3517,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -3988,6 +4018,16 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "md5"
|
||||
version = "0.7.0"
|
||||
@@ -3996,7 +4036,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -4007,7 +4047,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -4104,7 +4144,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"enum-iterator",
|
||||
@@ -4123,7 +4163,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -4158,7 +4198,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@@ -4192,7 +4232,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"allocator-api2 0.3.1",
|
||||
"arroy",
|
||||
@@ -4745,6 +4785,12 @@ version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "498a099351efa4becc6a19c72aa9270598e8fd274ca47052e37455241c88b696"
|
||||
|
||||
[[package]]
|
||||
name = "path-slash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42"
|
||||
|
||||
[[package]]
|
||||
name = "pbkdf2"
|
||||
version = "0.12.2"
|
||||
@@ -4773,7 +4819,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -4962,6 +5008,15 @@ version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic-util"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
|
||||
dependencies = [
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "potential_utf"
|
||||
version = "0.1.3"
|
||||
@@ -5139,6 +5194,16 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.38.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quinn"
|
||||
version = "0.11.9"
|
||||
@@ -5414,6 +5479,7 @@ dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2 0.4.12",
|
||||
"http 1.3.1",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
@@ -5704,6 +5770,25 @@ version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
||||
|
||||
[[package]]
|
||||
name = "rusty-s3"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fac2edd2f0b56bd79a7343f49afc01c2d41010df480538a510e0abc56044f66c"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"hmac",
|
||||
"jiff",
|
||||
"md-5",
|
||||
"percent-encoding",
|
||||
"quick-xml",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"url",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.20"
|
||||
@@ -6182,7 +6267,7 @@ checksum = "f9c425c07353535ef55b45420f5a8b0a397cd9bc3d7e5236497ca0d90604aa9b"
|
||||
dependencies = [
|
||||
"change-detection",
|
||||
"mime_guess",
|
||||
"path-slash",
|
||||
"path-slash 0.1.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7820,7 +7905,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
||||
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.23.0"
|
||||
version = "1.24.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.89-alpine3.20 AS compiler
|
||||
FROM rust:1.89-alpine3.22 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
@@ -20,7 +20,7 @@ RUN set -eux; \
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
|
||||
# Run
|
||||
FROM alpine:3.20
|
||||
FROM alpine:3.22
|
||||
LABEL org.opencontainers.image.source="https://github.com/meilisearch/meilisearch"
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
|
||||
@@ -60,7 +60,7 @@ impl FileStore {
|
||||
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
|
||||
let path = self.get_update_path(uuid);
|
||||
let path = self.update_path(uuid);
|
||||
let file = match StdFile::open(path) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
@@ -72,7 +72,7 @@ impl FileStore {
|
||||
}
|
||||
|
||||
/// Returns the path that correspond to this uuid, the path could not exists.
|
||||
pub fn get_update_path(&self, uuid: Uuid) -> PathBuf {
|
||||
pub fn update_path(&self, uuid: Uuid) -> PathBuf {
|
||||
self.path.join(uuid.to_string())
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ license.workspace = true
|
||||
anyhow = "1.0.98"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.8.0"
|
||||
@@ -28,10 +29,12 @@ meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
page_size = "0.6.0"
|
||||
path-slash = "0.2.1"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
@@ -45,6 +48,9 @@ tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
|
||||
rusty-s3 = "0.8.1"
|
||||
tokio = { version = "1.47.1", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -199,7 +199,7 @@ impl IndexMapper {
|
||||
let uuid = Uuid::new_v4();
|
||||
self.index_mapping.put(&mut wtxn, name, &uuid)?;
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
fs::create_dir_all(&index_path)?;
|
||||
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
@@ -286,7 +286,7 @@ impl IndexMapper {
|
||||
};
|
||||
|
||||
let index_map = self.index_map.clone();
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
let index_name = name.to_string();
|
||||
thread::Builder::new()
|
||||
.name(String::from("index_deleter"))
|
||||
@@ -408,7 +408,7 @@ impl IndexMapper {
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
// take the lock to reopen the environment.
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
@@ -425,7 +425,7 @@ impl IndexMapper {
|
||||
// if it's not already there.
|
||||
match index_map.get(&uuid) {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
|
||||
break index_map
|
||||
.create(
|
||||
@@ -452,6 +452,14 @@ impl IndexMapper {
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
/// Returns the path of the index.
|
||||
///
|
||||
/// The folder located at this path is containing the data.mdb,
|
||||
/// the lock.mdb and an optional data.mdb.cpy file.
|
||||
pub fn index_path(&self, uuid: Uuid) -> PathBuf {
|
||||
self.base_path.join(uuid.to_string())
|
||||
}
|
||||
|
||||
pub fn rollback_index(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
@@ -492,7 +500,7 @@ impl IndexMapper {
|
||||
};
|
||||
}
|
||||
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
let index_path = self.index_path(uuid);
|
||||
Index::rollback(milli::heed::EnvOpenOptions::new().read_txn_without_tls(), index_path, to)
|
||||
.map_err(|err| crate::Error::from_milli(err, Some(name.to_string())))
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
run_loop_iteration: _,
|
||||
embedders: _,
|
||||
chat_settings: _,
|
||||
runtime: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
||||
@@ -216,6 +216,8 @@ pub struct IndexScheduler {
|
||||
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc<RwLock<usize>>,
|
||||
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
@@ -242,6 +244,7 @@ impl IndexScheduler {
|
||||
run_loop_iteration: self.run_loop_iteration.clone(),
|
||||
features: self.features.clone(),
|
||||
chat_settings: self.chat_settings,
|
||||
runtime: self.runtime.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -260,6 +263,7 @@ impl IndexScheduler {
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
runtime: Option<tokio::runtime::Handle>,
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
||||
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
||||
) -> Result<Self> {
|
||||
@@ -341,6 +345,7 @@ impl IndexScheduler {
|
||||
run_loop_iteration: Arc::new(RwLock::new(0)),
|
||||
features,
|
||||
chat_settings,
|
||||
runtime,
|
||||
};
|
||||
|
||||
this.run();
|
||||
|
||||
@@ -75,6 +75,7 @@ make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
CancelingUpgrade,
|
||||
CleaningCompactionLeftover,
|
||||
UpdatingTasks,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,6 @@ enum AutobatchKind {
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
IndexCompaction,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
@@ -69,14 +68,14 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::IndexCompaction { .. } => AutobatchKind::IndexCompaction,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
KindWithContent::IndexCompaction { .. }
|
||||
| KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -120,9 +119,6 @@ pub enum BatchKind {
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexCompaction {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
@@ -188,13 +184,6 @@ impl BatchKind {
|
||||
)),
|
||||
false,
|
||||
),
|
||||
K::IndexCompaction => (
|
||||
Break((
|
||||
BatchKind::IndexCompaction { id: task_id },
|
||||
BatchStopReason::TaskCannotBeBatched { kind, id: task_id },
|
||||
)),
|
||||
false,
|
||||
),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
@@ -300,7 +289,7 @@ impl BatchKind {
|
||||
|
||||
match (self, autobatch_kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::IndexCompaction) => {
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => {
|
||||
Break((this, BatchStopReason::TaskCannotBeBatched { kind, id }))
|
||||
},
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
@@ -497,7 +486,6 @@ impl BatchKind {
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::IndexCompaction { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
|
||||
@@ -437,12 +437,6 @@ impl IndexScheduler {
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexSwap { task }))
|
||||
}
|
||||
BatchKind::IndexCompaction { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexCompaction { index_uid, task }))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -525,17 +519,33 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the export.
|
||||
// 3. we get the next task to compact
|
||||
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
|
||||
if let Some(task_id) = to_compact.min() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
|
||||
kind: Kind::IndexCompaction,
|
||||
id: task_id,
|
||||
});
|
||||
let index_uid =
|
||||
task.index_uid().expect("Compaction task must have an index uid").to_owned();
|
||||
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
current_batch.processing([&mut task]);
|
||||
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||
current_batch
|
||||
.reason(BatchStopReason::TaskCannotBeBatched { kind: Kind::Export, id: task_id });
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the snapshot.
|
||||
// 5. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -545,7 +555,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 5. we batch the dumps.
|
||||
// 6. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -558,7 +568,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::io::{Seek, SeekFrom};
|
||||
use std::fs::{remove_file, File};
|
||||
use std::io::{ErrorKind, Seek, SeekFrom};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
@@ -13,7 +14,7 @@ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::PersistError;
|
||||
use tempfile::{PersistError, TempPath};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
@@ -28,6 +29,9 @@ use crate::utils::{
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
/// The name of the copy of the data.mdb file used during compaction.
|
||||
const DATA_MDB_COPY_NAME: &str = "data.mdb.cpy";
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
@@ -558,11 +562,12 @@ impl IndexScheduler {
|
||||
.set_currently_updating_index(Some((index_uid.to_string(), index.clone())));
|
||||
|
||||
progress.update_progress(IndexCompaction::CreateTemporaryFile);
|
||||
let pre_size = std::fs::metadata(index.path().join("data.mdb"))?.len();
|
||||
let mut file = tempfile::Builder::new()
|
||||
.suffix("data.")
|
||||
.prefix(".mdb.cpy")
|
||||
.tempfile_in(index.path())?;
|
||||
let src_path = index.path().join("data.mdb");
|
||||
let pre_size = std::fs::metadata(&src_path)?.len();
|
||||
|
||||
let dst_path = TempPath::from_path(index.path().join(DATA_MDB_COPY_NAME));
|
||||
let file = File::create(&dst_path)?;
|
||||
let mut file = tempfile::NamedTempFile::from_parts(file, dst_path);
|
||||
|
||||
// 3. We copy the index data to the temporary file
|
||||
progress.update_progress(IndexCompaction::CopyAndCompactTheIndex);
|
||||
@@ -574,7 +579,7 @@ impl IndexScheduler {
|
||||
|
||||
// 4. We replace the index data file with the temporary file
|
||||
progress.update_progress(IndexCompaction::PersistTheCompactedIndex);
|
||||
match file.persist(index.path().join("data.mdb")) {
|
||||
match file.persist(src_path) {
|
||||
Ok(file) => file.sync_all()?,
|
||||
// TODO see if we have a _resource busy_ error and probably handle this by:
|
||||
// 1. closing the index, 2. replacing and 3. reopening it
|
||||
@@ -910,9 +915,10 @@ impl IndexScheduler {
|
||||
|
||||
let enqueued_tasks = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
|
||||
// 0. Check if any upgrade task was matched.
|
||||
// 0. Check if any upgrade or compaction tasks were matched.
|
||||
// If so, we cancel all the failed or enqueued upgrade tasks.
|
||||
let upgrade_tasks = &self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)?;
|
||||
let compaction_tasks = &self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)?;
|
||||
let is_canceling_upgrade = !matched_tasks.is_disjoint(upgrade_tasks);
|
||||
if is_canceling_upgrade {
|
||||
let failed_tasks = self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
@@ -977,7 +983,33 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
// 3. We now have a list of tasks to cancel, cancel them
|
||||
// 3. If we are cancelling a compaction task, remove the tempfiles after incomplete compactions
|
||||
for compaction_task in &tasks_to_cancel & compaction_tasks {
|
||||
progress.update_progress(TaskCancelationProgress::CleaningCompactionLeftover);
|
||||
let task = self.queue.tasks.get_task(rtxn, compaction_task)?.unwrap();
|
||||
let Some(Details::IndexCompaction {
|
||||
index_uid,
|
||||
pre_compaction_size: _,
|
||||
post_compaction_size: _,
|
||||
}) = task.details
|
||||
else {
|
||||
unreachable!("wrong details for compaction task {compaction_task}")
|
||||
};
|
||||
|
||||
let index_path = match self.index_mapper.index_mapping.get(rtxn, &index_uid)? {
|
||||
Some(index_uuid) => self.index_mapper.index_path(index_uuid),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if let Err(e) = remove_file(index_path.join(DATA_MDB_COPY_NAME)) {
|
||||
match e.kind() {
|
||||
ErrorKind::NotFound => (),
|
||||
_ => return Err(Error::IoError(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. We now have a list of tasks to cancel, cancel them
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::env::VarError;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::{compression, VERSION_FILE_NAME};
|
||||
use path_slash::PathBufExt;
|
||||
use reqwest::header::ETAG;
|
||||
use reqwest::Client;
|
||||
use rusty_s3::actions::{CreateMultipartUpload, S3Action as _};
|
||||
use rusty_s3::{Bucket, Credentials, UrlStyle};
|
||||
|
||||
use crate::heed::EnvOpenOptions;
|
||||
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
|
||||
@@ -78,10 +87,73 @@ impl IndexScheduler {
|
||||
pub(super) fn process_snapshot(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
||||
|
||||
const S3_BUCKET_URL: &str = "MEILI_S3_BUCKET_URL";
|
||||
const S3_BUCKET_REGION: &str = "MEILI_S3_BUCKET_REGION";
|
||||
const S3_BUCKET_NAME: &str = "MEILI_S3_BUCKET_NAME";
|
||||
const S3_SNAPSHOT_PREFIX: &str = "MEILI_S3_SNAPSHOT_PREFIX";
|
||||
const S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
|
||||
const S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
|
||||
|
||||
let bucket_url = std::env::var(S3_BUCKET_URL).map_err(|e| (S3_BUCKET_URL, e));
|
||||
let bucket_region = std::env::var(S3_BUCKET_REGION).map_err(|e| (S3_BUCKET_REGION, e));
|
||||
let bucket_name = std::env::var(S3_BUCKET_NAME).map_err(|e| (S3_BUCKET_NAME, e));
|
||||
let snapshot_prefix =
|
||||
std::env::var(S3_SNAPSHOT_PREFIX).map_err(|e| (S3_SNAPSHOT_PREFIX, e));
|
||||
let access_key = std::env::var(S3_ACCESS_KEY).map_err(|e| (S3_ACCESS_KEY, e));
|
||||
let secret_key = std::env::var(S3_SECRET_KEY).map_err(|e| (S3_SECRET_KEY, e));
|
||||
match (bucket_url, bucket_region, bucket_name, snapshot_prefix, access_key, secret_key) {
|
||||
(
|
||||
Ok(bucket_url),
|
||||
Ok(bucket_region),
|
||||
Ok(bucket_name),
|
||||
Ok(snapshot_prefix),
|
||||
Ok(access_key),
|
||||
Ok(secret_key),
|
||||
) => {
|
||||
let runtime = self.runtime.as_ref().expect("Runtime not initialized");
|
||||
#[cfg(not(unix))]
|
||||
panic!("Non-unix platform does not support S3 snapshotting");
|
||||
#[cfg(unix)]
|
||||
runtime.block_on(self.process_snapshot_to_s3(
|
||||
progress,
|
||||
bucket_url,
|
||||
bucket_region,
|
||||
bucket_name,
|
||||
PathBuf::from_slash(snapshot_prefix),
|
||||
access_key,
|
||||
secret_key,
|
||||
tasks,
|
||||
))
|
||||
}
|
||||
(
|
||||
Err((_, VarError::NotPresent)),
|
||||
Err((_, VarError::NotPresent)),
|
||||
Err((_, VarError::NotPresent)),
|
||||
Err((_, VarError::NotPresent)),
|
||||
Err((_, VarError::NotPresent)),
|
||||
Err((_, VarError::NotPresent)),
|
||||
) => self.process_snapshots_to_disk(progress, tasks),
|
||||
(Err((var, e)), _, _, _, _, _)
|
||||
| (_, Err((var, e)), _, _, _, _)
|
||||
| (_, _, Err((var, e)), _, _, _)
|
||||
| (_, _, _, Err((var, e)), _, _)
|
||||
| (_, _, _, _, Err((var, e)), _)
|
||||
| (_, _, _, _, _, Err((var, e))) => {
|
||||
// TODO: Handle error gracefully
|
||||
panic!("Error while reading environment variables: {}: {}", var, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn process_snapshots_to_disk(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>, Error> {
|
||||
fs::create_dir_all(&self.scheduler.snapshots_path)?;
|
||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||
|
||||
@@ -140,7 +212,7 @@ impl IndexScheduler {
|
||||
let task =
|
||||
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
let src = self.queue.file_store.get_update_path(content_uuid);
|
||||
let src = self.queue.file_store.update_path(content_uuid);
|
||||
let dst = update_files_dir.join(content_uuid.to_string());
|
||||
fs::copy(src, dst)?;
|
||||
}
|
||||
@@ -206,4 +278,283 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub(super) async fn process_snapshot_to_s3(
|
||||
&self,
|
||||
progress: Progress,
|
||||
bucket_url: String,
|
||||
bucket_region: String,
|
||||
bucket_name: String,
|
||||
snapshot_prefix: PathBuf,
|
||||
access_key: String,
|
||||
secret_key: String,
|
||||
mut tasks: Vec<Task>,
|
||||
) -> Result<Vec<Task>> {
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek as _, SeekFrom, Write as _};
|
||||
use std::os::fd::OwnedFd;
|
||||
use std::path::Path;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use reqwest::Response;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
const ONE_HOUR: Duration = Duration::from_secs(3600);
|
||||
// default part size is 250MiB
|
||||
// TODO use 375MiB
|
||||
// It must be at least 2x5MiB
|
||||
const PART_SIZE: usize = 10 * 1024 * 1024;
|
||||
|
||||
// The maximum number of parts that can be uploaded in parallel.
|
||||
const S3_MAX_IN_FLIGHT_PARTS: &str = "MEILI_S3_MAX_IN_FLIGHT_PARTS";
|
||||
let max_in_flight_parts: usize = match std::env::var(S3_MAX_IN_FLIGHT_PARTS) {
|
||||
Ok(val) => val.parse().expect("Failed to parse MEILI_S3_MAX_IN_FLIGHT_PARTS"),
|
||||
Err(_) => 10,
|
||||
};
|
||||
|
||||
let client = Client::new();
|
||||
// TODO Remove this unwrap
|
||||
let url = bucket_url.parse().unwrap();
|
||||
let bucket = Bucket::new(url, UrlStyle::Path, bucket_name, bucket_region).unwrap();
|
||||
let credential = Credentials::new(access_key, secret_key);
|
||||
// TODO change this and use the database name like in the original version
|
||||
let object_path = snapshot_prefix.join("data.ms.snapshot");
|
||||
let object = object_path.to_slash().expect("Invalid UTF-8 path").into_owned();
|
||||
|
||||
eprintln!("Starting the upload of the snapshot to {object}");
|
||||
|
||||
// TODO implement exponential backoff on upload requests: https://docs.rs/backoff
|
||||
// TODO return a result with actual errors
|
||||
// TODO sign for longer than an hour?
|
||||
// NOTE to make it work on Windows we could try using std::io::pipe instead.
|
||||
// However, we are still using the tokio unix pipe in the async upload loop.
|
||||
let (reader, writer) = std::io::pipe()?;
|
||||
let uploader_task = tokio::spawn(async move {
|
||||
let reader = OwnedFd::from(reader);
|
||||
let reader = tokio::net::unix::pipe::Receiver::from_owned_fd(reader)?;
|
||||
let action = bucket.create_multipart_upload(Some(&credential), &object);
|
||||
// TODO Question: If it is only signed for an hour and a snapshot takes longer than an hour, what happens?
|
||||
// If the part is deleted (like a TTL) we should sign it for at least 24 hours.
|
||||
let url = action.sign(ONE_HOUR);
|
||||
let resp = client.post(url).send().await.unwrap().error_for_status().unwrap();
|
||||
let body = resp.text().await.unwrap();
|
||||
|
||||
let multipart = CreateMultipartUpload::parse_response(&body).unwrap();
|
||||
let mut etags = Vec::<String>::new();
|
||||
let mut in_flight =
|
||||
VecDeque::<(JoinHandle<reqwest::Result<Response>>, Bytes)>::with_capacity(
|
||||
max_in_flight_parts,
|
||||
);
|
||||
|
||||
for part_number in 1u16.. {
|
||||
let part_upload = bucket.upload_part(
|
||||
Some(&credential),
|
||||
&object,
|
||||
part_number,
|
||||
multipart.upload_id(),
|
||||
);
|
||||
let url = part_upload.sign(ONE_HOUR);
|
||||
|
||||
// Wait for a buffer to be ready if there are in-flight parts that landed
|
||||
let mut buffer = if in_flight.len() >= max_in_flight_parts {
|
||||
let (request, buffer) = in_flight.pop_front().unwrap();
|
||||
let resp = request.await.unwrap().unwrap().error_for_status().unwrap();
|
||||
let etag =
|
||||
resp.headers().get(ETAG).expect("every UploadPart request returns an Etag");
|
||||
let mut buffer = match buffer.try_into_mut() {
|
||||
Ok(buffer) => buffer,
|
||||
Err(_) => panic!("Valid to convert into BytesMut"),
|
||||
};
|
||||
// TODO use bumpalo to reduce the number of allocations
|
||||
etags.push(etag.to_str().unwrap().to_owned());
|
||||
buffer.clear();
|
||||
buffer
|
||||
} else {
|
||||
// TODO Base this on the available memory
|
||||
BytesMut::with_capacity(PART_SIZE)
|
||||
};
|
||||
|
||||
while buffer.len() < (PART_SIZE / 2) {
|
||||
eprintln!(
|
||||
"buffer is {:.2}% full, trying to read more",
|
||||
buffer.len() as f32 / buffer.capacity() as f32 * 100.0
|
||||
);
|
||||
|
||||
// Wait for the pipe to be readable
|
||||
reader.readable().await?;
|
||||
|
||||
match reader.try_read_buf(&mut buffer) {
|
||||
Ok(0) => break,
|
||||
// We read some bytes but maybe not enough
|
||||
Ok(n) => {
|
||||
eprintln!("Read {} bytes from pipe, continuing", n);
|
||||
continue;
|
||||
}
|
||||
// The readiness event is a false positive.
|
||||
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {
|
||||
eprintln!("received a WouldBlock");
|
||||
continue;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"buffer is {:.2}% full",
|
||||
buffer.len() as f32 / buffer.capacity() as f32 * 100.0
|
||||
);
|
||||
|
||||
if buffer.is_empty() {
|
||||
eprintln!("buffer is empty, breaking part number loop");
|
||||
// Break the loop if the buffer is empty
|
||||
// after we tried to read bytes
|
||||
break;
|
||||
}
|
||||
|
||||
let body = buffer.freeze();
|
||||
eprintln!("Sending part {}", part_number);
|
||||
let task = tokio::spawn(client.put(url).body(body.clone()).send());
|
||||
in_flight.push_back((task, body));
|
||||
}
|
||||
|
||||
for (join_handle, _buffer) in in_flight {
|
||||
let resp = join_handle.await.unwrap().unwrap().error_for_status().unwrap();
|
||||
let etag =
|
||||
resp.headers().get(ETAG).expect("every UploadPart request returns an Etag");
|
||||
// TODO use bumpalo to reduce the number of allocations
|
||||
etags.push(etag.to_str().unwrap().to_owned());
|
||||
}
|
||||
|
||||
eprintln!("Finalizing the multipart upload");
|
||||
|
||||
let action = bucket.complete_multipart_upload(
|
||||
Some(&credential),
|
||||
&object,
|
||||
multipart.upload_id(),
|
||||
etags.iter().map(AsRef::as_ref),
|
||||
);
|
||||
let url = action.sign(ONE_HOUR);
|
||||
let resp = client.post(url).body(action.body()).send().await.unwrap();
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap();
|
||||
eprintln!("Status: {status}, Text: {text}");
|
||||
|
||||
// TODO do a better check and do not assert
|
||||
// assert!(resp.status().is_success());
|
||||
|
||||
Result::<_, Error>::Ok(())
|
||||
});
|
||||
|
||||
// TODO not a big fan of this clone
|
||||
// remove it and get all the necessary data from the scheduler
|
||||
let index_scheduler = IndexScheduler::private_clone(self);
|
||||
let builder_task = tokio::task::spawn_blocking(move || {
|
||||
// NOTE enabling compression still generates a corrupted tarball
|
||||
let writer = flate2::write::GzEncoder::new(writer, flate2::Compression::none());
|
||||
let mut tarball = tar::Builder::new(writer);
|
||||
|
||||
// 1. Snapshot the version file
|
||||
tarball.append_path_with_name(
|
||||
&index_scheduler.scheduler.version_file_path,
|
||||
VERSION_FILE_NAME,
|
||||
)?;
|
||||
|
||||
// 2. Snapshot the index scheduler LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||
let mut tasks_env_file = index_scheduler.env.try_clone_inner_file()?;
|
||||
// NOTE That made the trick !!! Should I memory map instead?
|
||||
tasks_env_file.seek(SeekFrom::Start(0))?;
|
||||
let path = Path::new("tasks").join("data.mdb");
|
||||
// NOTE when commenting this line, the tarball works better
|
||||
tarball.append_file(path, &mut tasks_env_file)?;
|
||||
drop(tasks_env_file);
|
||||
|
||||
// 2.3 Create a read transaction on the index-scheduler
|
||||
let rtxn = index_scheduler.env.read_txn()?;
|
||||
|
||||
// 2.4 Create the update files directory
|
||||
// And only copy the update files of the enqueued tasks
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||
let enqueued = index_scheduler.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||
progress.update_progress(update_file_progress);
|
||||
// TODO I need to create the update files directory (even if empty)
|
||||
// I should probably simply use the append_dir_all method
|
||||
// but I'll loose the progression.
|
||||
let update_files_dir = Path::new("update_files");
|
||||
for task_id in enqueued {
|
||||
let task = index_scheduler
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, task_id)?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Some(content_uuid) = task.content_uuid() {
|
||||
let src = index_scheduler.queue.file_store.update_path(content_uuid);
|
||||
let mut update_file = File::open(src)?;
|
||||
let path = update_files_dir.join(content_uuid.to_string());
|
||||
tarball.append_file(path, &mut update_file)?;
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3. Snapshot every indexes
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||
let index_mapping = index_scheduler.index_mapper.index_mapping;
|
||||
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||
let indexes_dir = Path::new("indexes");
|
||||
let indexes_references: Vec<_> = index_scheduler
|
||||
.index_mapper
|
||||
.index_mapping
|
||||
.iter(&rtxn)?
|
||||
.map(|res| res.map_err(Error::from).map(|(name, uuid)| (name.to_string(), uuid)))
|
||||
.collect::<Result<_, Error>>()?;
|
||||
|
||||
dbg!(&indexes_references);
|
||||
|
||||
// Note that we need to collect and open all of the indexes files because
|
||||
// otherwise, using a for loop, we would have to have a Send rtxn.
|
||||
// TODO I don't need to do this trick if my writer is NOT async
|
||||
for (i, (name, uuid)) in indexes_references.into_iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
|
||||
&name, i as u32, nb_indexes,
|
||||
));
|
||||
let path = indexes_dir.join(uuid.to_string()).join("data.mdb");
|
||||
let index = index_scheduler.index_mapper.index(&rtxn, &name)?;
|
||||
let mut index_file = index.try_clone_inner_file().unwrap();
|
||||
index_file.seek(SeekFrom::Start(0))?;
|
||||
eprintln!("Appending index file for {} in {}", name, path.display());
|
||||
tarball.append_file(path, &mut index_file)?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
|
||||
// 4. Snapshot the auth LMDB env
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||
let mut auth_env_file =
|
||||
index_scheduler.scheduler.auth_env.try_clone_inner_file().unwrap();
|
||||
auth_env_file.seek(SeekFrom::Start(0))?;
|
||||
let path = Path::new("auth").join("data.mdb");
|
||||
tarball.append_file(path, &mut auth_env_file)?;
|
||||
|
||||
let mut gzencoder = tarball.into_inner()?;
|
||||
gzencoder.flush()?;
|
||||
gzencoder.try_finish()?;
|
||||
let mut writer = gzencoder.finish()?;
|
||||
writer.flush()?;
|
||||
|
||||
Result::<_, Error>::Ok(())
|
||||
});
|
||||
|
||||
let (uploader_result, builder_result) = tokio::join!(uploader_task, builder_task);
|
||||
|
||||
builder_result.unwrap()?;
|
||||
uploader_result.unwrap()?;
|
||||
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 23, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.23.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 23, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 23, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 23, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.23.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 23, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.23.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 23, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.23.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -126,7 +126,7 @@ impl IndexScheduler {
|
||||
std::fs::create_dir_all(&options.auth_path).unwrap();
|
||||
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
|
||||
let index_scheduler =
|
||||
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
|
||||
Self::new(options, auth_env, version, None, sender, planned_failures).unwrap();
|
||||
|
||||
// To be 100% consistent between all test we're going to start the scheduler right now
|
||||
// and ensure it's in the expected starting state.
|
||||
|
||||
@@ -47,6 +47,7 @@ pub fn upgrade_index_scheduler(
|
||||
(1, 21, _) => 0,
|
||||
(1, 22, _) => 0,
|
||||
(1, 23, _) => 0,
|
||||
(1, 24, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
|
||||
@@ -216,7 +216,10 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
pub fn setup_meilisearch(
|
||||
opt: &Opt,
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let index_scheduler_opt = IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
@@ -256,6 +259,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
index_scheduler_opt,
|
||||
OnFailure::RemoveDb,
|
||||
binary_version, // the db is empty
|
||||
handle,
|
||||
)?,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
@@ -273,7 +277,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
}
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let src_path_exists = path.exists();
|
||||
@@ -284,6 +288,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
index_scheduler_opt,
|
||||
OnFailure::RemoveDb,
|
||||
binary_version, // the db is empty
|
||||
handle,
|
||||
)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
@@ -304,10 +309,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
||||
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
}
|
||||
} else {
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||
};
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
@@ -338,6 +343,7 @@ fn open_or_create_database_unchecked(
|
||||
index_scheduler_opt: IndexSchedulerOptions,
|
||||
on_failure: OnFailure,
|
||||
version: (u32, u32, u32),
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
@@ -345,7 +351,7 @@ fn open_or_create_database_unchecked(
|
||||
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|
||||
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
|
||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version, Some(handle))?)
|
||||
};
|
||||
|
||||
match (
|
||||
@@ -452,6 +458,7 @@ fn open_or_create_database(
|
||||
index_scheduler_opt: IndexSchedulerOptions,
|
||||
empty_db: bool,
|
||||
binary_version: (u32, u32, u32),
|
||||
handle: tokio::runtime::Handle,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
let version = if !empty_db {
|
||||
check_version(opt, &index_scheduler_opt, binary_version)?
|
||||
@@ -459,7 +466,7 @@ fn open_or_create_database(
|
||||
binary_version
|
||||
};
|
||||
|
||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
|
||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version, handle)
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
|
||||
@@ -76,7 +76,10 @@ fn on_panic(info: &std::panic::PanicHookInfo) {
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
try_main().await.inspect_err(|error| {
|
||||
// won't panic inside of tokio::main
|
||||
let runtime = tokio::runtime::Handle::current();
|
||||
|
||||
try_main(runtime).await.inspect_err(|error| {
|
||||
tracing::error!(%error);
|
||||
let mut current = error.source();
|
||||
let mut depth = 0;
|
||||
@@ -88,7 +91,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
})
|
||||
}
|
||||
|
||||
async fn try_main() -> anyhow::Result<()> {
|
||||
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
@@ -122,7 +125,7 @@ async fn try_main() -> anyhow::Result<()> {
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, runtime)?;
|
||||
|
||||
let analytics =
|
||||
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
|
||||
|
||||
@@ -22,11 +22,12 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult, SemanticRatio,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -345,15 +346,20 @@ pub async fn search_with_url_query(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
index_scheduler.features(),
|
||||
request_uid,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
@@ -453,16 +459,21 @@ pub async fn search_with_post(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
request_uid,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -235,6 +235,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
request_uid: _,
|
||||
metadata: _,
|
||||
} = result;
|
||||
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
|
||||
@@ -45,6 +45,7 @@ use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetada
|
||||
use crate::search::{
|
||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
|
||||
INCLUDE_METADATA_HEADER,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
use crate::Opt;
|
||||
@@ -184,6 +185,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
.is_some_and(|s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
/// Parse the `Meili-Include-Metadata` header from an HTTP request.
|
||||
///
|
||||
/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive).
|
||||
/// Returns `false` if the header is not present or has any other value.
|
||||
pub fn parse_include_metadata_header(req: &HttpRequest) -> bool {
|
||||
req.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|v| matches!(v.to_lowercase().as_str(), "true" | "1"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
|
||||
@@ -18,10 +18,11 @@ use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
||||
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex,
|
||||
SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -188,6 +189,7 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
let response = match federation {
|
||||
Some(federation) => {
|
||||
debug!(
|
||||
@@ -209,6 +211,7 @@ pub async fn multi_search_with_post(
|
||||
features,
|
||||
is_proxy,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -279,13 +282,16 @@ pub async fn multi_search_with_post(
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid_str.clone(),
|
||||
SearchParams {
|
||||
index_uid: index_uid_str.clone(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
features,
|
||||
request_uid,
|
||||
)
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -22,7 +22,8 @@ use uuid::Uuid;
|
||||
use super::super::ranking_rules::{self, RankingRules};
|
||||
use super::super::{
|
||||
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker,
|
||||
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
|
||||
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchMetadata, SearchQuery,
|
||||
SearchQueryWithIndex,
|
||||
};
|
||||
use super::proxy::{proxy_search, ProxySearchError, ProxySearchParams};
|
||||
use super::types::{
|
||||
@@ -41,6 +42,7 @@ pub async fn perform_federated_search(
|
||||
features: RoFeatures,
|
||||
is_proxy: bool,
|
||||
request_uid: Uuid,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
if is_proxy {
|
||||
features.check_network("Performing a remote federated search")?;
|
||||
@@ -59,20 +61,38 @@ pub async fn perform_federated_search(
|
||||
|
||||
let network = index_scheduler.network();
|
||||
|
||||
// Preconstruct metadata keeping the original queries order for later metadata building
|
||||
let precomputed_query_metadata: Option<Vec<_>> = include_metadata.then(|| {
|
||||
queries
|
||||
.iter()
|
||||
.map(|q| {
|
||||
(
|
||||
q.index_uid.to_string(),
|
||||
q.federation_options.as_ref().and_then(|o| o.remote.clone()),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
// this implementation partition the queries by index to guarantee an important property:
|
||||
// - all the queries to a particular index use the same read transaction.
|
||||
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||
|
||||
// 1. partition queries by host and index
|
||||
let mut partitioned_queries = PartitionedQueries::new();
|
||||
|
||||
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||
partitioned_queries.partition(federated_query, query_index, &network, features)?
|
||||
}
|
||||
|
||||
// 2. perform queries, merge and make hits index by index
|
||||
// 2.1. start remote queries
|
||||
let remote_search =
|
||||
RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline);
|
||||
let remote_search = RemoteSearch::start(
|
||||
partitioned_queries.remote_queries_by_host,
|
||||
&federation,
|
||||
deadline,
|
||||
include_metadata,
|
||||
);
|
||||
|
||||
// 2.2. concurrently execute local queries
|
||||
let params = SearchByIndexParams {
|
||||
@@ -114,11 +134,25 @@ pub async fn perform_federated_search(
|
||||
let after_waiting_remote_results = std::time::Instant::now();
|
||||
|
||||
// 3. merge hits and metadata across indexes and hosts
|
||||
// 3.1. merge metadata
|
||||
|
||||
// 3.1. Build metadata in the same order as the original queries
|
||||
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
|
||||
// If a remote is present, set the local remote name
|
||||
let local_remote_name = network.local.clone().filter(|_| partitioned_queries.has_remote);
|
||||
|
||||
build_query_metadata(
|
||||
precomputed_query_metadata,
|
||||
local_remote_name,
|
||||
&remote_results,
|
||||
&results_by_index,
|
||||
)
|
||||
});
|
||||
|
||||
// 3.2. merge federation metadata
|
||||
let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) =
|
||||
merge_metadata(&mut results_by_index, &remote_results);
|
||||
|
||||
// 3.2. merge hits
|
||||
// 3.3. merge hits
|
||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
|
||||
.skip(federation.offset)
|
||||
.take(federation.limit)
|
||||
@@ -133,7 +167,7 @@ pub async fn perform_federated_search(
|
||||
.map(|hit| hit.hit())
|
||||
.collect();
|
||||
|
||||
// 3.3. merge query vectors
|
||||
// 3.4. merge query vectors
|
||||
let query_vectors = if retrieve_vectors {
|
||||
for remote_results in remote_results.iter_mut() {
|
||||
if let Some(remote_vectors) = remote_results.query_vectors.take() {
|
||||
@@ -152,7 +186,7 @@ pub async fn perform_federated_search(
|
||||
None
|
||||
};
|
||||
|
||||
// 3.4. merge facets
|
||||
// 3.5. merge facets
|
||||
let (facet_distribution, facet_stats, facets_by_index) =
|
||||
facet_order.merge(federation.merge_facets, remote_results, facets);
|
||||
|
||||
@@ -179,6 +213,7 @@ pub async fn perform_federated_search(
|
||||
facets_by_index,
|
||||
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
|
||||
request_uid: Some(request_uid),
|
||||
metadata: query_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -402,6 +437,7 @@ struct SearchHitByIndex {
|
||||
|
||||
struct SearchResultByIndex {
|
||||
index: String,
|
||||
primary_key: Option<String>,
|
||||
hits: Vec<SearchHitByIndex>,
|
||||
estimated_total_hits: usize,
|
||||
degraded: bool,
|
||||
@@ -409,6 +445,61 @@ struct SearchResultByIndex {
|
||||
facets: Option<ComputedFacets>,
|
||||
}
|
||||
|
||||
/// Builds query metadata for federated search results.
|
||||
///
|
||||
/// This function creates metadata for each query in the same order as the original queries,
|
||||
/// combining information from both local and remote search results. It handles the mapping
|
||||
/// of primary keys to their respective indexes and remotes to prevent collisions when
|
||||
/// multiple remotes have the same index_uid but different primary keys.
|
||||
fn build_query_metadata(
|
||||
precomputed_query_metadata: Vec<(String, Option<String>)>,
|
||||
local_remote_name: Option<String>,
|
||||
remote_results: &[FederatedSearchResult],
|
||||
results_by_index: &[SearchResultByIndex],
|
||||
) -> Vec<SearchMetadata> {
|
||||
// Create a map of (remote, index_uid) -> primary_key for quick lookup
|
||||
// This prevents collisions when multiple remotes have the same index_uid but different primary keys
|
||||
let mut primary_key_per_index = std::collections::HashMap::new();
|
||||
|
||||
// Build metadata for remote results
|
||||
for remote_result in remote_results {
|
||||
if let Some(remote_metadata) = &remote_result.metadata {
|
||||
for remote_meta in remote_metadata {
|
||||
if let SearchMetadata {
|
||||
remote: Some(remote_name),
|
||||
index_uid,
|
||||
primary_key: Some(primary_key),
|
||||
..
|
||||
} = remote_meta
|
||||
{
|
||||
let key = (Some(remote_name), index_uid);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata for local results
|
||||
for local_meta in results_by_index {
|
||||
if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = local_meta {
|
||||
let key = (None, index);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata in the same order as the original queries
|
||||
let mut query_metadata = Vec::new();
|
||||
for (index_uid, remote) in precomputed_query_metadata {
|
||||
let primary_key =
|
||||
primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string());
|
||||
let query_uid = Uuid::now_v7();
|
||||
// if the remote is not set, use the local remote name
|
||||
let remote = remote.or_else(|| local_remote_name.clone());
|
||||
query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote });
|
||||
}
|
||||
query_metadata
|
||||
}
|
||||
|
||||
fn merge_metadata(
|
||||
results_by_index: &mut Vec<SearchResultByIndex>,
|
||||
remote_results: &Vec<FederatedSearchResult>,
|
||||
@@ -420,6 +511,7 @@ fn merge_metadata(
|
||||
let mut max_remote_duration = Duration::ZERO;
|
||||
for SearchResultByIndex {
|
||||
index,
|
||||
primary_key: _,
|
||||
hits: _,
|
||||
estimated_total_hits: estimated_total_hits_by_index,
|
||||
facets: facets_by_index,
|
||||
@@ -448,6 +540,7 @@ fn merge_metadata(
|
||||
degraded: degraded_for_host,
|
||||
used_negative_operator: host_used_negative_operator,
|
||||
remote_errors: _,
|
||||
metadata: _,
|
||||
request_uid: _,
|
||||
} in remote_results
|
||||
{
|
||||
@@ -576,7 +669,12 @@ struct RemoteSearch {
|
||||
}
|
||||
|
||||
impl RemoteSearch {
|
||||
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
|
||||
fn start(
|
||||
queries: RemoteQueriesByHost,
|
||||
federation: &Federation,
|
||||
deadline: Instant,
|
||||
include_metadata: bool,
|
||||
) -> Self {
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_millis(200))
|
||||
@@ -596,7 +694,10 @@ impl RemoteSearch {
|
||||
// never merge distant facets
|
||||
proxy_federation.merge_facets = None;
|
||||
let params = params.clone();
|
||||
async move { proxy_search(&node, queries, proxy_federation, ¶ms).await }
|
||||
async move {
|
||||
proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata)
|
||||
.await
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
@@ -640,6 +741,13 @@ impl RemoteSearch {
|
||||
continue 'remote_queries;
|
||||
}
|
||||
|
||||
// Add remote name to metadata
|
||||
if let Some(metadata) = res.metadata.as_mut() {
|
||||
for meta in metadata {
|
||||
meta.remote = Some(node_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
federation.insert(
|
||||
FEDERATION_REMOTE.to_string(),
|
||||
serde_json::Value::String(node_name.clone()),
|
||||
@@ -735,6 +843,7 @@ impl SearchByIndex {
|
||||
}
|
||||
};
|
||||
let rtxn = index.read_txn()?;
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
let criteria = index.criteria(&rtxn)?;
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
@@ -761,6 +870,12 @@ impl SearchByIndex {
|
||||
return Err(error);
|
||||
}
|
||||
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||
|
||||
// all queries for an index share the same budget
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
for QueryByIndex { query, weight, query_index } in queries {
|
||||
// use an immediately invoked lambda to capture the result without returning from the function
|
||||
|
||||
@@ -830,17 +945,13 @@ impl SearchByIndex {
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (mut search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
|
||||
&index,
|
||||
&rtxn,
|
||||
&query,
|
||||
&search_kind,
|
||||
time_budget,
|
||||
// clones of `TimeBudget` share the budget rather than restart it
|
||||
time_budget.clone(),
|
||||
params.features,
|
||||
)?;
|
||||
|
||||
@@ -987,6 +1098,7 @@ impl SearchByIndex {
|
||||
})?;
|
||||
self.results_by_index.push(SearchResultByIndex {
|
||||
index: index_uid,
|
||||
primary_key,
|
||||
hits: merged_result,
|
||||
estimated_total_hits,
|
||||
degraded,
|
||||
|
||||
@@ -7,7 +7,7 @@ use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
|
||||
use crate::search::SearchQueryWithIndex;
|
||||
use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER};
|
||||
|
||||
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
||||
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
||||
@@ -98,6 +98,7 @@ pub async fn proxy_search(
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
federation: Federation,
|
||||
params: &ProxySearchParams,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ProxySearchError> {
|
||||
let url = format!("{}/multi-search", node.url);
|
||||
|
||||
@@ -119,7 +120,16 @@ pub async fn proxy_search(
|
||||
};
|
||||
|
||||
for i in 0..params.try_count {
|
||||
match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await {
|
||||
match try_proxy_search(
|
||||
&url,
|
||||
search_api_key,
|
||||
&federated,
|
||||
¶ms.client,
|
||||
deadline,
|
||||
include_metadata,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
let duration = retry.into_duration(i)?;
|
||||
@@ -127,7 +137,7 @@ pub async fn proxy_search(
|
||||
}
|
||||
}
|
||||
}
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline)
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata)
|
||||
.await
|
||||
.map_err(Retry::into_error)
|
||||
}
|
||||
@@ -138,6 +148,7 @@ async fn try_proxy_search(
|
||||
federated: &FederatedSearch,
|
||||
client: &Client,
|
||||
deadline: std::time::Instant,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, Retry> {
|
||||
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
||||
|
||||
@@ -148,6 +159,8 @@ async fn try_proxy_search(
|
||||
request
|
||||
};
|
||||
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
||||
let request =
|
||||
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
|
||||
@@ -18,6 +18,8 @@ use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
|
||||
use crate::milli::vector::Embedding;
|
||||
|
||||
@@ -134,6 +136,8 @@ pub struct FederatedSearchResult {
|
||||
pub facets_by_index: FederatedFacets,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<Vec<SearchMetadata>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
|
||||
@@ -160,6 +164,7 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
facets_by_index,
|
||||
remote_errors,
|
||||
request_uid,
|
||||
metadata,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchResult");
|
||||
@@ -195,6 +200,9 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
|
||||
@@ -57,6 +57,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
@@ -836,6 +837,18 @@ pub struct SearchHit {
|
||||
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct SearchMetadata {
|
||||
pub query_uid: Uuid,
|
||||
pub index_uid: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub remote: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
@@ -854,6 +867,8 @@ pub struct SearchResult {
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<SearchMetadata>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_hit_count: Option<u32>,
|
||||
@@ -876,6 +891,7 @@ impl fmt::Debug for SearchResult {
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
request_uid,
|
||||
metadata,
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
@@ -908,6 +924,9 @@ impl fmt::Debug for SearchResult {
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
@@ -1120,16 +1139,28 @@ pub fn prepare_search<'t>(
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
request_uid: Uuid,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
pub struct SearchParams {
|
||||
pub index_uid: String,
|
||||
pub query: SearchQuery,
|
||||
pub search_kind: SearchKind,
|
||||
pub retrieve_vectors: RetrieveVectors,
|
||||
pub features: RoFeatures,
|
||||
pub request_uid: Uuid,
|
||||
pub include_metadata: bool,
|
||||
}
|
||||
|
||||
pub fn perform_search(params: SearchParams, index: &Index) -> Result<SearchResult, ResponseError> {
|
||||
let SearchParams {
|
||||
index_uid,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
} = params;
|
||||
let before_search = Instant::now();
|
||||
let index_uid_for_metadata = index_uid.clone();
|
||||
let rtxn = index.read_txn()?;
|
||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
@@ -1150,7 +1181,20 @@ pub fn perform_search(
|
||||
query_vector,
|
||||
},
|
||||
semantic_hit_count,
|
||||
) = search_from_kind(index_uid, search_kind, search)?;
|
||||
) = search_from_kind(index_uid.clone(), search_kind, search)?;
|
||||
|
||||
let metadata = if include_metadata {
|
||||
let query_uid = Uuid::now_v7();
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
Some(SearchMetadata {
|
||||
query_uid,
|
||||
index_uid: index_uid_for_metadata,
|
||||
primary_key,
|
||||
remote: None, // Local searches don't have a remote
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
@@ -1233,7 +1277,6 @@ pub fn perform_search(
|
||||
.transpose()?
|
||||
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||
.unzip();
|
||||
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
@@ -1246,6 +1289,7 @@ pub fn perform_search(
|
||||
used_negative_operator,
|
||||
semantic_hit_count,
|
||||
request_uid: Some(request_uid),
|
||||
metadata,
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -516,6 +516,18 @@ impl<State> Index<'_, State> {
|
||||
self.service.post_encoded(url, query, self.encoder).await
|
||||
}
|
||||
|
||||
pub async fn search_with_headers(
|
||||
&self,
|
||||
query: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref()));
|
||||
let body = serde_json::to_string(&query).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str(url, body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
|
||||
self.service.get(url).await
|
||||
|
||||
@@ -49,8 +49,8 @@ impl Server<Owned> {
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -65,7 +65,9 @@ impl Server<Owned> {
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -78,7 +80,9 @@ impl Server<Owned> {
|
||||
}
|
||||
|
||||
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options)?;
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle)?;
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Ok(Server { service, _dir: None, _marker: PhantomData })
|
||||
@@ -217,8 +221,9 @@ impl Server<Shared> {
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||
let service = Service { index_scheduler, auth, api_key: None, options };
|
||||
|
||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||
@@ -390,6 +395,17 @@ impl<State> Server<State> {
|
||||
self.service.post("/multi-search", queries).await
|
||||
}
|
||||
|
||||
pub async fn multi_search_with_headers(
|
||||
&self,
|
||||
queries: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let body = serde_json::to_string(&queries).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str("/multi-search", body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
|
||||
self.service.get(format!("/indexes{parameters}")).await
|
||||
}
|
||||
|
||||
387
crates/meilisearch/tests/search/metadata.rs
Normal file
387
crates/meilisearch/tests/search/metadata.rs
Normal file
@@ -0,0 +1,387 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use crate::common::{shared_index_with_documents, Server, DOCUMENTS};
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_without_metadata_header() {
|
||||
let index = shared_index_with_documents().await;
|
||||
|
||||
// Test that metadata is not included by default
|
||||
index
|
||||
.search(json!({"q": "glass"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_metadata_header() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test with Meili-Include-Metadata header
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_metadata_header_and_primary_key() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, Some("id")).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test with Meili-Include-Metadata header
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "450465",
|
||||
"title": "Gläss",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn multi_search_without_metadata_header() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test multi-search without metadata header
|
||||
let (response, code) = server
|
||||
.multi_search(json!({
|
||||
"queries": [
|
||||
{"indexUid": index.uid, "q": "glass"},
|
||||
{"indexUid": index.uid, "q": "dragon"}
|
||||
]
|
||||
}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
},
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"color": [
|
||||
"green",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "dragon",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
]
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn multi_search_with_metadata_header() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, Some("id")).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test multi-search with metadata header
|
||||
let (response, code) = server
|
||||
.multi_search_with_headers(
|
||||
json!({
|
||||
"queries": [
|
||||
{"indexUid": index.uid, "q": "glass"},
|
||||
{"indexUid": index.uid, "q": "dragon"}
|
||||
]
|
||||
}),
|
||||
vec![("Meili-Include-Metadata", "true")],
|
||||
)
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[0].metadata.queryUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]", ".results[1].metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"id": "450465",
|
||||
"title": "Gläss",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
},
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"id": "166428",
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"color": [
|
||||
"green",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "dragon",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_metadata_header_false_value() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test with header set to false
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "false")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_metadata_uuid_format() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_metadata_consistency_across_requests() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, Some("id")).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Make multiple requests and check that metadata is consistent
|
||||
for _i in 0..3 {
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "450465",
|
||||
"title": "Gläss",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ mod hybrid;
|
||||
#[cfg(not(feature = "chinese-pinyin"))]
|
||||
mod locales;
|
||||
mod matching_strategy;
|
||||
mod metadata;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn version_too_old() {
|
||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.23.0");
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.24.0");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
|
||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.23.1 is higher than the Meilisearch version 1.23.0. Downgrade is not supported");
|
||||
snapshot!(err, @"Database version 1.24.1 is higher than the Meilisearch version 1.24.0. Downgrade is not supported");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.23.0"
|
||||
"upgradeTo": "v1.24.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -68,7 +68,7 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
|
||||
|
||||
for uuid in file_store.all_uuids().context("while retrieving uuids from file store")? {
|
||||
let uuid = uuid.context("while retrieving uuid from file store")?;
|
||||
let update_file_path = file_store.get_update_path(uuid);
|
||||
let update_file_path = file_store.update_path(uuid);
|
||||
let update_file = file_store
|
||||
.get_update(uuid)
|
||||
.with_context(|| format!("while getting update file for uuid {uuid:?}"))?;
|
||||
|
||||
@@ -34,7 +34,7 @@ grenad = { version = "0.5.0", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { version = "0.22.1-nested-rtxns", default-features = false, features = [
|
||||
heed = { version = "0.22.1-nested-rtxns-2", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
] }
|
||||
@@ -90,7 +90,7 @@ rhai = { version = "1.22.2", features = [
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.6.4-nested-rtxns"
|
||||
hannoy = { version = "0.0.9-nested-rtxns", features = ["arroy"] }
|
||||
hannoy = { version = "0.0.9-nested-rtxns-2", features = ["arroy"] }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
|
||||
@@ -425,6 +425,10 @@ impl Index {
|
||||
self.env.info().map_size
|
||||
}
|
||||
|
||||
pub fn try_clone_inner_file(&self) -> Result<File> {
|
||||
Ok(self.env.try_clone_inner_file()?)
|
||||
}
|
||||
|
||||
pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
|
||||
self.env.copy_to_file(file, option).map_err(Into::into)
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
|
||||
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
|
||||
|
||||
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
|
||||
ranking_rules[0].start_iteration(ctx, logger, universe, query, &time_budget)?;
|
||||
|
||||
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
|
||||
|
||||
@@ -168,42 +168,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
};
|
||||
|
||||
while valid_docids.len() < max_len_to_evaluate {
|
||||
if time_budget.exceeded() {
|
||||
loop {
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
let is_below_threshold =
|
||||
ranking_score_threshold.is_some_and(|ranking_score_threshold| {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
current_score < ranking_score_threshold
|
||||
});
|
||||
|
||||
if is_below_threshold {
|
||||
all_candidates -= &bucket;
|
||||
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
|
||||
} else {
|
||||
maybe_add_to_results!(bucket);
|
||||
}
|
||||
|
||||
ranking_rule_scores.pop();
|
||||
|
||||
if cur_ranking_rule_index == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
back!();
|
||||
}
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: valid_scores,
|
||||
docids: valid_docids,
|
||||
all_candidates,
|
||||
degraded: true,
|
||||
});
|
||||
}
|
||||
|
||||
// The universe for this bucket is zero, so we don't need to sort
|
||||
// anything, just go back to the parent ranking rule.
|
||||
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|
||||
@@ -216,14 +180,63 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
|
||||
ctx,
|
||||
logger,
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
)?
|
||||
else {
|
||||
back!();
|
||||
continue;
|
||||
let next_bucket = if time_budget.exceeded() {
|
||||
match ranking_rules[cur_ranking_rule_index].non_blocking_next_bucket(
|
||||
ctx,
|
||||
logger,
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
)? {
|
||||
std::task::Poll::Ready(bucket) => bucket,
|
||||
std::task::Poll::Pending => {
|
||||
loop {
|
||||
let bucket =
|
||||
std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
let is_below_threshold =
|
||||
ranking_score_threshold.is_some_and(|ranking_score_threshold| {
|
||||
let current_score =
|
||||
ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
current_score < ranking_score_threshold
|
||||
});
|
||||
|
||||
if is_below_threshold {
|
||||
all_candidates -= &bucket;
|
||||
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
|
||||
} else {
|
||||
maybe_add_to_results!(bucket);
|
||||
}
|
||||
|
||||
ranking_rule_scores.pop();
|
||||
|
||||
if cur_ranking_rule_index == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
back!();
|
||||
}
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: valid_scores,
|
||||
docids: valid_docids,
|
||||
all_candidates,
|
||||
degraded: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
|
||||
ctx,
|
||||
logger,
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
&time_budget,
|
||||
)?
|
||||
else {
|
||||
back!();
|
||||
continue;
|
||||
};
|
||||
next_bucket
|
||||
};
|
||||
|
||||
ranking_rule_scores.push(next_bucket.score);
|
||||
@@ -275,6 +288,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
logger,
|
||||
&next_bucket.candidates,
|
||||
&next_bucket.query,
|
||||
&time_budget,
|
||||
)?;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::ExactTerm;
|
||||
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger};
|
||||
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
|
||||
/// A ranking rule that produces 3 disjoint buckets:
|
||||
///
|
||||
@@ -35,6 +35,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &roaring::RoaringBitmap,
|
||||
query: &QueryGraph,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<()> {
|
||||
self.state = State::start_iteration(ctx, universe, query)?;
|
||||
Ok(())
|
||||
@@ -46,6 +47,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &roaring::RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
let state = std::mem::take(&mut self.state);
|
||||
let (state, output) = State::next(state, universe);
|
||||
|
||||
@@ -7,7 +7,7 @@ use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait
|
||||
use crate::documents::geo_sort::{fill_cache, next_bucket};
|
||||
use crate::documents::{GeoSortParameter, GeoSortStrategy};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::{GeoPoint, Result, SearchContext, SearchLogger};
|
||||
use crate::{GeoPoint, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
|
||||
pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
||||
query: Option<Q>,
|
||||
@@ -84,6 +84,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
query: &Q,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<()> {
|
||||
assert!(self.query.is_none());
|
||||
|
||||
@@ -110,6 +111,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::score_details::Rank;
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::ranking_rule_graph::PathVisitor;
|
||||
use crate::{Result, TermsMatchingStrategy};
|
||||
use crate::{Result, TermsMatchingStrategy, TimeBudget};
|
||||
|
||||
pub type Words = GraphBasedRankingRule<WordsGraph>;
|
||||
impl GraphBasedRankingRule<WordsGraph> {
|
||||
@@ -135,6 +135,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
_universe: &RoaringBitmap,
|
||||
query_graph: &QueryGraph,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<()> {
|
||||
// the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph.
|
||||
//
|
||||
@@ -217,6 +218,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
|
||||
// should never happen
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use std::task::Poll;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, SearchContext};
|
||||
use crate::score_details::ScoreDetails;
|
||||
use crate::Result;
|
||||
use crate::{Result, TimeBudget};
|
||||
|
||||
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
|
||||
pub trait RankingRuleQueryTrait: Sized + Clone + 'static {}
|
||||
@@ -28,12 +30,15 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
|
||||
/// buckets using [`next_bucket`](RankingRule::next_bucket).
|
||||
///
|
||||
/// The given universe is the universe that will be given to [`next_bucket`](RankingRule::next_bucket).
|
||||
///
|
||||
/// If this function may take a long time, it should check the `time_budget` and return early if exceeded.
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
query: &Query,
|
||||
time_budget: &TimeBudget,
|
||||
) -> Result<()>;
|
||||
|
||||
/// Return the next bucket of this ranking rule.
|
||||
@@ -43,13 +48,31 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
|
||||
/// The universe given as argument is either:
|
||||
/// - a subset of the universe given to the previous call to [`next_bucket`](RankingRule::next_bucket); OR
|
||||
/// - the universe given to [`start_iteration`](RankingRule::start_iteration)
|
||||
///
|
||||
/// If this function may take a long time, it should check the `time_budget` and return early if exceeded.
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
time_budget: &TimeBudget,
|
||||
) -> Result<Option<RankingRuleOutput<Query>>>;
|
||||
|
||||
/// Return the next bucket of this ranking rule, if doing so can be done without blocking
|
||||
///
|
||||
/// Even if the time budget is exceeded, when getting the next bucket is a fast operation, this should return `true`
|
||||
/// to allow Meilisearch to collect the results.
|
||||
///
|
||||
/// Default implementation conservatively returns that it would block.
|
||||
fn non_blocking_next_bucket(
|
||||
&mut self,
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
_universe: &RoaringBitmap,
|
||||
) -> Result<Poll<RankingRuleOutput<Query>>> {
|
||||
Ok(Poll::Pending)
|
||||
}
|
||||
|
||||
/// Finish iterating over the buckets, which yields control to the parent ranking rule
|
||||
/// The next call to this ranking rule, if any, will be [`start_iteration`](RankingRule::start_iteration).
|
||||
fn end_iteration(
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::{FieldId, Index, Result};
|
||||
use crate::{FieldId, Index, Result, TimeBudget};
|
||||
|
||||
pub trait RankingRuleOutputIter<'ctx, Query> {
|
||||
fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>>;
|
||||
@@ -96,6 +96,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
parent_candidates: &RoaringBitmap,
|
||||
parent_query: &Query,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<()> {
|
||||
let iter: RankingRuleOutputIterWrapper<'ctx, Query> = match self.field_id {
|
||||
Some(field_id) => {
|
||||
@@ -194,6 +195,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||
let iter = self.iter.as_mut().unwrap();
|
||||
if let Some(mut bucket) = iter.next_bucket()? {
|
||||
|
||||
@@ -3,12 +3,17 @@
|
||||
//! 2. A test that ensure the filters are affectively applied even with a cutoff of 0
|
||||
//! 3. A test that ensure the cutoff works well with the ranking scores
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::update::Setting;
|
||||
use crate::vector::settings::EmbeddingSettings;
|
||||
use crate::vector::{Embedder, EmbedderOptions};
|
||||
use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
@@ -361,9 +366,8 @@ fn degraded_search_and_score_details() {
|
||||
]
|
||||
"###);
|
||||
|
||||
// After SIX loop iteration. The words ranking rule gave us a new bucket.
|
||||
// Since we reached the limit we were able to early exit without checking the typo ranking rule.
|
||||
search.time_budget(TimeBudget::max().with_stop_after(6));
|
||||
// After FIVE loop iterations. The words ranking rule gave us a new bucket.
|
||||
search.time_budget(TimeBudget::max().with_stop_after(5));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
@@ -424,4 +428,399 @@ fn degraded_search_and_score_details() {
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
// After SIX loop iterations.
|
||||
// we finished
|
||||
search.time_budget(TimeBudget::max().with_stop_after(6));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
IDs: [4, 1, 0, 3]
|
||||
Scores: 1.0000 0.9167 0.8333 0.6667
|
||||
Score Details:
|
||||
[
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 0,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 1,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 2,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 2,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 0,
|
||||
max_typo_count: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degraded_search_and_score_details_vector() {
|
||||
let index = create_index();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 4,
|
||||
"text": "hella puppo kefir",
|
||||
"_vectors": {
|
||||
"default": [0.1, 0.1]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"text": "hella puppy kefir",
|
||||
"_vectors": {
|
||||
"default": [-0.1, 0.1]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"text": "hello",
|
||||
"_vectors": {
|
||||
"default": [0.1, -0.1]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "hello puppy",
|
||||
"_vectors": {
|
||||
"default": [-0.1, -0.1]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"text": "hello puppy kefir",
|
||||
"_vectors": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
let mut embedders = BTreeMap::new();
|
||||
embedders.insert(
|
||||
"default".into(),
|
||||
Setting::Set(EmbeddingSettings {
|
||||
source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided),
|
||||
dimensions: Setting::Set(2),
|
||||
..Default::default()
|
||||
}),
|
||||
);
|
||||
settings.set_embedder_settings(embedders);
|
||||
settings.set_vector_store(crate::vector::VectorStoreBackend::Hannoy);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(
|
||||
EmbedderOptions::UserProvided(crate::vector::embedder::manual::EmbedderOptions {
|
||||
dimensions: 2,
|
||||
distribution: None,
|
||||
}),
|
||||
0,
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
search.semantic("default".into(), embedder, false, Some(vec![1., -1.]), None);
|
||||
|
||||
search.limit(4);
|
||||
search.scoring_strategy(ScoringStrategy::Detailed);
|
||||
search.time_budget(TimeBudget::max());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
IDs: [2, 0, 3, 1]
|
||||
Scores: 1.0000 0.5000 0.5000 0.0000
|
||||
Score Details:
|
||||
[
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
1.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
// Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket.
|
||||
search.time_budget(TimeBudget::max().with_stop_after(1));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
IDs: [0, 1, 2, 3]
|
||||
Scores: 0.5000 0.0000 0.0000 0.0000
|
||||
Score Details:
|
||||
[
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
search.time_budget(TimeBudget::max().with_stop_after(2));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
IDs: [0, 1, 2, 3]
|
||||
Scores: 0.5000 0.0000 0.0000 0.0000
|
||||
Score Details:
|
||||
[
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
search.time_budget(TimeBudget::max().with_stop_after(3));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
IDs: [2, 0, 1, 3]
|
||||
Scores: 1.0000 0.5000 0.0000 0.0000
|
||||
Score Details:
|
||||
[
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
1.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
search.time_budget(TimeBudget::max().with_stop_after(4));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
IDs: [2, 0, 3, 1]
|
||||
Scores: 1.0000 0.5000 0.5000 0.0000
|
||||
Score Details:
|
||||
[
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
1.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
search.time_budget(TimeBudget::max().with_stop_after(5));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||
IDs: [2, 0, 3, 1]
|
||||
Scores: 1.0000 0.5000 0.5000 0.0000
|
||||
Score Details:
|
||||
[
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
1.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.5,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Vector(
|
||||
Vector {
|
||||
similarity: Some(
|
||||
0.0,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::iter::FromIterator;
|
||||
use std::task::Poll;
|
||||
use std::time::Instant;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -7,7 +8,7 @@ use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait
|
||||
use super::VectorStoreStats;
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::vector::{DistributionShift, Embedder, VectorStore};
|
||||
use crate::{DocumentId, Result, SearchContext, SearchLogger};
|
||||
use crate::{DocumentId, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
|
||||
pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
||||
query: Option<Q>,
|
||||
@@ -52,6 +53,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
vector_candidates: &RoaringBitmap,
|
||||
time_budget: &TimeBudget,
|
||||
) -> Result<()> {
|
||||
let target = &self.target;
|
||||
let backend = ctx.index.get_vector_store(ctx.txn)?.unwrap_or_default();
|
||||
@@ -59,7 +61,13 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
let before = Instant::now();
|
||||
let reader =
|
||||
VectorStore::new(backend, ctx.index.vector_store, self.embedder_index, self.quantized);
|
||||
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||
let results = reader.nns_by_vector(
|
||||
ctx.txn,
|
||||
target,
|
||||
self.limit,
|
||||
Some(vector_candidates),
|
||||
time_budget,
|
||||
)?;
|
||||
self.cached_sorted_docids = results.into_iter();
|
||||
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
|
||||
total_time: before.elapsed(),
|
||||
@@ -69,6 +77,20 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn next_result(&mut self, vector_candidates: &RoaringBitmap) -> Option<(DocumentId, f32)> {
|
||||
for (docid, distance) in self.cached_sorted_docids.by_ref() {
|
||||
if vector_candidates.contains(docid) {
|
||||
let score = 1.0 - distance;
|
||||
let score = self
|
||||
.distribution_shift
|
||||
.map(|distribution| distribution.shift(score))
|
||||
.unwrap_or(score);
|
||||
return Some((docid, score));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
@@ -83,12 +105,13 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
query: &Q,
|
||||
time_budget: &TimeBudget,
|
||||
) -> Result<()> {
|
||||
assert!(self.query.is_none());
|
||||
|
||||
self.query = Some(query.clone());
|
||||
let vector_candidates = &self.vector_candidates & universe;
|
||||
self.fill_buffer(ctx, &vector_candidates)?;
|
||||
self.fill_buffer(ctx, &vector_candidates, time_budget)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -99,6 +122,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
time_budget: &TimeBudget,
|
||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
let vector_candidates = &self.vector_candidates & universe;
|
||||
@@ -111,24 +135,17 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
}));
|
||||
}
|
||||
|
||||
for (docid, distance) in self.cached_sorted_docids.by_ref() {
|
||||
if vector_candidates.contains(docid) {
|
||||
let score = 1.0 - distance;
|
||||
let score = self
|
||||
.distribution_shift
|
||||
.map(|distribution| distribution.shift(score))
|
||||
.unwrap_or(score);
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: RoaringBitmap::from_iter([docid]),
|
||||
score: ScoreDetails::Vector(score_details::Vector { similarity: Some(score) }),
|
||||
}));
|
||||
}
|
||||
if let Some((docid, score)) = self.next_result(&vector_candidates) {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: RoaringBitmap::from_iter([docid]),
|
||||
score: ScoreDetails::Vector(score_details::Vector { similarity: Some(score) }),
|
||||
}));
|
||||
}
|
||||
|
||||
// if we got out of this loop it means we've exhausted our cache.
|
||||
// we need to refill it and run the function again.
|
||||
self.fill_buffer(ctx, &vector_candidates)?;
|
||||
self.fill_buffer(ctx, &vector_candidates, time_budget)?;
|
||||
|
||||
// we tried filling the buffer, but it remained empty 😢
|
||||
// it means we don't actually have any document remaining in the universe with a vector.
|
||||
@@ -141,11 +158,39 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
}));
|
||||
}
|
||||
|
||||
self.next_bucket(ctx, _logger, universe)
|
||||
self.next_bucket(ctx, _logger, universe, time_budget)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
|
||||
fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger<Q>) {
|
||||
self.query = None;
|
||||
}
|
||||
|
||||
fn non_blocking_next_bucket(
|
||||
&mut self,
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Poll<RankingRuleOutput<Q>>> {
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
let vector_candidates = &self.vector_candidates & universe;
|
||||
|
||||
if vector_candidates.is_empty() {
|
||||
return Ok(Poll::Ready(RankingRuleOutput {
|
||||
query,
|
||||
candidates: universe.clone(),
|
||||
score: ScoreDetails::Vector(score_details::Vector { similarity: None }),
|
||||
}));
|
||||
}
|
||||
|
||||
if let Some((docid, score)) = self.next_result(&vector_candidates) {
|
||||
Ok(Poll::Ready(RankingRuleOutput {
|
||||
query,
|
||||
candidates: RoaringBitmap::from_iter([docid]),
|
||||
score: ScoreDetails::Vector(score_details::Vector { similarity: Some(score) }),
|
||||
}))
|
||||
} else {
|
||||
Ok(Poll::Pending)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
||||
&ToTargetNoOp { target: (1, 21, 0) },
|
||||
&ToTargetNoOp { target: (1, 22, 0) },
|
||||
&ToTargetNoOp { target: (1, 23, 0) },
|
||||
&ToTargetNoOp { target: (1, 24, 0) },
|
||||
// This is the last upgrade function, it will be called when the index is up to date.
|
||||
// any other upgrade function should be added before this one.
|
||||
&ToCurrentNoOp {},
|
||||
@@ -75,6 +76,7 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
|
||||
(1, 21, _) => function_index!(11),
|
||||
(1, 22, _) => function_index!(12),
|
||||
(1, 23, _) => function_index!(13),
|
||||
(1, 24, _) => function_index!(14),
|
||||
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
|
||||
// considering dumpless upgrade.
|
||||
(_major, _minor, _patch) => return None,
|
||||
|
||||
@@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::progress::Progress;
|
||||
use crate::vector::Embeddings;
|
||||
use crate::TimeBudget;
|
||||
|
||||
const HANNOY_EF_CONSTRUCTION: usize = 125;
|
||||
const HANNOY_M: usize = 16;
|
||||
@@ -591,6 +592,7 @@ impl VectorStore {
|
||||
vector: &[f32],
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
time_budget: &TimeBudget,
|
||||
) -> crate::Result<Vec<(ItemId, f32)>> {
|
||||
if self.backend == VectorStoreBackend::Arroy {
|
||||
if self.quantized {
|
||||
@@ -601,11 +603,25 @@ impl VectorStore {
|
||||
.map_err(Into::into)
|
||||
}
|
||||
} else if self.quantized {
|
||||
self._hannoy_nns_by_vector(rtxn, self._hannoy_quantized_db(), vector, limit, filter)
|
||||
.map_err(Into::into)
|
||||
self._hannoy_nns_by_vector(
|
||||
rtxn,
|
||||
self._hannoy_quantized_db(),
|
||||
vector,
|
||||
limit,
|
||||
filter,
|
||||
time_budget,
|
||||
)
|
||||
.map_err(Into::into)
|
||||
} else {
|
||||
self._hannoy_nns_by_vector(rtxn, self._hannoy_angular_db(), vector, limit, filter)
|
||||
.map_err(Into::into)
|
||||
self._hannoy_nns_by_vector(
|
||||
rtxn,
|
||||
self._hannoy_angular_db(),
|
||||
vector,
|
||||
limit,
|
||||
filter,
|
||||
time_budget,
|
||||
)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
}
|
||||
pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> crate::Result<Vec<Vec<f32>>> {
|
||||
@@ -1000,6 +1016,7 @@ impl VectorStore {
|
||||
vector: &[f32],
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
time_budget: &TimeBudget,
|
||||
) -> Result<Vec<(ItemId, f32)>, hannoy::Error> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
@@ -1011,7 +1028,10 @@ impl VectorStore {
|
||||
searcher.candidates(filter);
|
||||
}
|
||||
|
||||
results.append(&mut searcher.by_vector(rtxn, vector)?);
|
||||
let (res, _degraded) =
|
||||
&mut searcher
|
||||
.by_vector_with_cancellation(rtxn, vector, || time_budget.exceeded())?;
|
||||
results.append(res);
|
||||
}
|
||||
|
||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||
|
||||
@@ -20,14 +20,16 @@ These make us iterate fast before stabilizing it for the current release.
|
||||
|
||||
### Release steps
|
||||
|
||||
The prototype name must follow this convention: `prototype-X-Y` where
|
||||
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
|
||||
The prototype name must follow this convention: `prototype-v<version>.<name>-<number>` where
|
||||
- `version` is the version of Meilisearch on which the prototype is based.
|
||||
- `name` is the feature name formatted in `kebab-case`. It should not end with a single number.
|
||||
- `Y` is the version of the prototype, starting from `0`.
|
||||
|
||||
✅ Example: `prototype-auto-resize-0`. </br>
|
||||
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
|
||||
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
|
||||
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
|
||||
✅ Example: `prototype-v1.23.0.search-personalization-0`. </br>
|
||||
❌ Bad example: `prototype-search-personalization-0`: version is missing.</br>
|
||||
❌ Bad example: `v1.23.0.auto-resize-0`: lacks the `prototype` prefix. </br>
|
||||
❌ Bad example: `prototype-v1.23.0.auto-resize`: lacks the version suffix. </br>
|
||||
❌ Bad example: `prototype-v1.23.0.auto-resize-0-0`: feature name ends with a single number.
|
||||
|
||||
Steps to create a prototype:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user