mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-27 08:12:36 +00:00
Compare commits
242 Commits
v1.12.0-rc
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd58a71f57 | ||
|
|
e0f446e4d3 | ||
|
|
3bbad823e0 | ||
|
|
b605549bf2 | ||
|
|
6a1062edf5 | ||
|
|
426ea5aa97 | ||
|
|
e20b91210d | ||
|
|
17478301ab | ||
|
|
968c9dff27 | ||
|
|
463553988c | ||
|
|
c321fdb9c0 | ||
|
|
36b6e94b29 | ||
|
|
34dea863e5 | ||
|
|
ad9d8e10f2 | ||
|
|
f7f35ef37c | ||
|
|
c575d2693b | ||
|
|
024e06f7e3 | ||
|
|
145fa3a8ff | ||
|
|
d3a7e10348 | ||
|
|
1c78447226 | ||
|
|
c55891f73b | ||
|
|
40f8c0d840 | ||
|
|
34d8c1a903 | ||
|
|
3c9483b6e0 | ||
|
|
8c789b3c7a | ||
|
|
3403eae9ee | ||
|
|
11458eefd9 | ||
|
|
289eb92bef | ||
|
|
cea0c89212 | ||
|
|
1cadab9ad8 | ||
|
|
6383f8f19e | ||
|
|
8a9f952bda | ||
|
|
a5c44b4d79 | ||
|
|
8c35744848 | ||
|
|
c0d414fc3c | ||
|
|
b56358f606 | ||
|
|
b84c0a5390 | ||
|
|
ce621e447e | ||
|
|
aee74f47aa | ||
|
|
be2717edbd | ||
|
|
c66841626e | ||
|
|
d0bc8c755a | ||
|
|
031abfd281 | ||
|
|
27169bc7b4 | ||
|
|
181a01f8d8 | ||
|
|
1d153c1867 | ||
|
|
5fde2a3ee1 | ||
|
|
4465a1a3c9 | ||
|
|
e342ae1b46 | ||
|
|
dcb4c49cf2 | ||
|
|
e83c021755 | ||
|
|
7ec7200378 | ||
|
|
6a577254fa | ||
|
|
fd88c834c3 | ||
|
|
b4005593f4 | ||
|
|
8ee3793259 | ||
|
|
3648abbfd5 | ||
|
|
4d2433de12 | ||
|
|
28cc6df7a3 | ||
|
|
7b14cb10a1 | ||
|
|
34f4602ae8 | ||
|
|
12e21a177b | ||
|
|
7a9290aaae | ||
|
|
5d219587b8 | ||
|
|
6e9aa49893 | ||
|
|
6b3a2c7281 | ||
|
|
5908aec6cb | ||
|
|
19f48c15fb | ||
|
|
47b484c07c | ||
|
|
7d5e28b475 | ||
|
|
0648e06aa2 | ||
|
|
33921747b7 | ||
|
|
970a489dcc | ||
|
|
ba11121cfc | ||
|
|
acdd5aa6ea | ||
|
|
2f3cc8cdd2 | ||
|
|
7a95fed23f | ||
|
|
961de4d34e | ||
|
|
18ce95dcbf | ||
|
|
c177210b1b | ||
|
|
1fc90fbacb | ||
|
|
6c72559457 | ||
|
|
1fdfa3f208 | ||
|
|
0d0c18f519 | ||
|
|
d12364c1e0 | ||
|
|
8cd3a1aa57 | ||
|
|
08fd026ebd | ||
|
|
75d5cea624 | ||
|
|
ab9213fa94 | ||
|
|
45d5d4bf40 | ||
|
|
fa885e75b4 | ||
|
|
29fc77ee5b | ||
|
|
ad4dc70720 | ||
|
|
5d682b4700 | ||
|
|
f1beb60204 | ||
|
|
85577e70cd | ||
|
|
c5536c37b5 | ||
|
|
9245c89cfe | ||
|
|
eaabc1af2f | ||
|
|
04a24a9239 | ||
|
|
1f54dfa883 | ||
|
|
786b0fabea | ||
|
|
26733c705d | ||
|
|
ab75f53efd | ||
|
|
867e6a8f1d | ||
|
|
6f4823fc97 | ||
|
|
df9b68f8ed | ||
|
|
5bc6391700 | ||
|
|
eaa897d983 | ||
|
|
bfca54cc2c | ||
|
|
04a62d2b97 | ||
|
|
8c19cb0a0b | ||
|
|
5c492031d9 | ||
|
|
fb1caa4724 | ||
|
|
5622b9607d | ||
|
|
01bcc601be | ||
|
|
93fbdc06d3 | ||
|
|
69c931334f | ||
|
|
d683f5980c | ||
|
|
f8ba112f66 | ||
|
|
c614d0dd35 | ||
|
|
479607e5dd | ||
|
|
bb00e70087 | ||
|
|
e974be9518 | ||
|
|
aeb6b74725 | ||
|
|
a751972c57 | ||
|
|
6b269795d2 | ||
|
|
d075be798a | ||
|
|
89637bcaaf | ||
|
|
866ac91be3 | ||
|
|
e610af36aa | ||
|
|
7cf6707ed3 | ||
|
|
1995040846 | ||
|
|
07f42e8057 | ||
|
|
71f59749dc | ||
|
|
3b0b9967f6 | ||
|
|
123b54a178 | ||
|
|
f5dd8dfc3e | ||
|
|
bcfed70888 | ||
|
|
503ef3bbc9 | ||
|
|
08f2c696b0 | ||
|
|
b75f1f4c17 | ||
|
|
95ed079761 | ||
|
|
4a082683df | ||
|
|
26be5e0733 | ||
|
|
bd5110a2fe | ||
|
|
fa8b9acdf6 | ||
|
|
2b74d1824b | ||
|
|
c77b00d3ac | ||
|
|
c77073efcc | ||
|
|
1537323eb9 | ||
|
|
a0a3b55700 | ||
|
|
214b51de87 | ||
|
|
95975944d7 | ||
|
|
9a9383643f | ||
|
|
cac355bfa7 | ||
|
|
9020a50df8 | ||
|
|
52843123d4 | ||
|
|
6298db5bea | ||
|
|
a003a0934a | ||
|
|
3a11e39c01 | ||
|
|
5f896b1050 | ||
|
|
d0c4e6da6b | ||
|
|
2da5584bb5 | ||
|
|
b7eb802ae6 | ||
|
|
2e32d0474c | ||
|
|
cb99ac6f7e | ||
|
|
be411435f5 | ||
|
|
29ef164530 | ||
|
|
739c52a3cd | ||
|
|
7a2af06b1e | ||
|
|
cb0c3a5aad | ||
|
|
8388698993 | ||
|
|
cbcf6c9ba3 | ||
|
|
bf742d81cf | ||
|
|
7458f0386c | ||
|
|
fc1df5793c | ||
|
|
3ded069042 | ||
|
|
261d2ceb06 | ||
|
|
1a17e2e572 | ||
|
|
5b8cd68abe | ||
|
|
5ce9acb0b9 | ||
|
|
953a82ca04 | ||
|
|
54341c2e80 | ||
|
|
96831ed9bb | ||
|
|
0459b1a242 | ||
|
|
8ecb726683 | ||
|
|
297e72e262 | ||
|
|
0ad2f57a92 | ||
|
|
71d53f413f | ||
|
|
054622bd16 | ||
|
|
e905a72d73 | ||
|
|
2e879c1df8 | ||
|
|
d040aff101 | ||
|
|
5e30731cad | ||
|
|
beeb31ce41 | ||
|
|
057143214d | ||
|
|
6a1d26a60c | ||
|
|
d78f4666a0 | ||
|
|
a439fa3e1a | ||
|
|
767259be7e | ||
|
|
e9f34fb4b1 | ||
|
|
d5c07ef7b3 | ||
|
|
5e218f3f4d | ||
|
|
bcab61ab1d | ||
|
|
263c5a348e | ||
|
|
be7d2fbe63 | ||
|
|
f7f9a131e4 | ||
|
|
5df5eb2db2 | ||
|
|
30eb0e5b5b | ||
|
|
5b860cb989 | ||
|
|
76d0623b11 | ||
|
|
db4eaf4d2d | ||
|
|
13f21206a6 | ||
|
|
14ee7aa84c | ||
|
|
8a35cd1743 | ||
|
|
8d33af1dff | ||
|
|
3c7ac093d3 | ||
|
|
d49d127863 | ||
|
|
b57dd5c58e | ||
|
|
90b428a8c3 | ||
|
|
096a28656e | ||
|
|
3dc87f5baa | ||
|
|
cc4bd54669 | ||
|
|
5383f41bba | ||
|
|
58eab9a018 | ||
|
|
9f36ffcbdb | ||
|
|
68c4717e21 | ||
|
|
5c488e20cc | ||
|
|
da650f834e | ||
|
|
e83534a430 | ||
|
|
98d4a2909e | ||
|
|
a514ce472a | ||
|
|
cc63802115 | ||
|
|
acec45ad7c | ||
|
|
08d6413365 | ||
|
|
70802eb7c7 | ||
|
|
6ac5b3b136 | ||
|
|
e1e76f39d0 | ||
|
|
2094ce8a9a | ||
|
|
8442db8101 | ||
|
|
79671c9faa |
129
Cargo.lock
generated
129
Cargo.lock
generated
@@ -489,9 +489,14 @@ version = "0.22.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
||||
|
||||
[[package]]
|
||||
name = "bbqueue"
|
||||
version = "0.5.1"
|
||||
source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bumpalo",
|
||||
@@ -684,7 +689,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@@ -701,6 +706,20 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumparaw-collections"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"bitpacking",
|
||||
"bumpalo",
|
||||
"hashbrown 0.15.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byte-unit"
|
||||
version = "5.1.4"
|
||||
@@ -1246,19 +1265,6 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-queue",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.13"
|
||||
@@ -1658,7 +1664,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1870,7 +1876,7 @@ checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
@@ -1892,7 +1898,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1912,12 +1918,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flume"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
|
||||
dependencies = [
|
||||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@@ -2042,7 +2057,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"bumpalo",
|
||||
@@ -2609,14 +2624,16 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"big_s",
|
||||
"bincode",
|
||||
"bumpalo",
|
||||
"crossbeam",
|
||||
"bumparaw-collections",
|
||||
"convert_case 0.6.0",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"derive_builder 0.20.0",
|
||||
"dump",
|
||||
@@ -2630,7 +2647,6 @@ dependencies = [
|
||||
"meilisearch-types",
|
||||
"memmap2",
|
||||
"page_size",
|
||||
"raw-collections",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"serde",
|
||||
@@ -2646,12 +2662,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.2.6"
|
||||
version = "2.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
||||
checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.3",
|
||||
"hashbrown 0.15.1",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@@ -2806,7 +2822,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -3425,7 +3441,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -3434,7 +3450,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -3524,7 +3540,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"enum-iterator",
|
||||
@@ -3543,11 +3559,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
"bumpalo",
|
||||
"bumparaw-collections",
|
||||
"convert_case 0.6.0",
|
||||
"csv",
|
||||
"deserr",
|
||||
@@ -3560,8 +3577,8 @@ dependencies = [
|
||||
"meili-snap",
|
||||
"memmap2",
|
||||
"milli",
|
||||
"raw-collections",
|
||||
"roaring",
|
||||
"rustc-hash 2.1.0",
|
||||
"serde",
|
||||
"serde-cs",
|
||||
"serde_json",
|
||||
@@ -3575,16 +3592,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
|
||||
"clap",
|
||||
"dump",
|
||||
"file-store",
|
||||
"indexmap",
|
||||
"meilisearch-auth",
|
||||
"meilisearch-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"time",
|
||||
"uuid",
|
||||
]
|
||||
@@ -3607,15 +3627,17 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bbqueue",
|
||||
"big_s",
|
||||
"bimap",
|
||||
"bincode",
|
||||
"bstr",
|
||||
"bumpalo",
|
||||
"bumparaw-collections",
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"candle-core",
|
||||
@@ -3630,6 +3652,7 @@ dependencies = [
|
||||
"enum-iterator",
|
||||
"filter-parser",
|
||||
"flatten-serde-json",
|
||||
"flume",
|
||||
"fst",
|
||||
"fxhash",
|
||||
"geoutils",
|
||||
@@ -3653,13 +3676,12 @@ dependencies = [
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"raw-collections",
|
||||
"rayon",
|
||||
"rayon-par-bridge",
|
||||
"rhai",
|
||||
"roaring",
|
||||
"rstar",
|
||||
"rustc-hash 2.0.0",
|
||||
"rustc-hash 2.1.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slice-group-by",
|
||||
@@ -4061,7 +4083,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -4408,7 +4430,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"rand",
|
||||
"ring",
|
||||
"rustc-hash 2.0.0",
|
||||
"rustc-hash 2.1.0",
|
||||
"rustls",
|
||||
"slab",
|
||||
"thiserror",
|
||||
@@ -4484,19 +4506,6 @@ dependencies = [
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-collections"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"bitpacking",
|
||||
"bumpalo",
|
||||
"hashbrown 0.15.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "10.7.0"
|
||||
@@ -4743,8 +4752,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.6"
|
||||
source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#8ff028e484fb6192a0acf5a669eaf18c30cada6e"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -4793,9 +4803,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.0.0"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
|
||||
checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
@@ -4964,9 +4974,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.132"
|
||||
version = "1.0.133"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
|
||||
checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
@@ -5186,6 +5196,9 @@ name = "spin"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spm_precompiled"
|
||||
@@ -6473,7 +6486,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
||||
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.12.0"
|
||||
version = "1.12.7"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
@@ -43,6 +43,3 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[patch.crates-io]
|
||||
roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
|
||||
|
||||
@@ -24,7 +24,7 @@ tempfile = "3.14.0"
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.6"
|
||||
roaring = "0.10.7"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
|
||||
@@ -8,6 +8,7 @@ use bumpalo::Bump;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use milli::documents::PrimaryKey;
|
||||
use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
@@ -16,6 +17,7 @@ use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@@ -150,13 +152,14 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -164,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -216,13 +219,14 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -230,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -260,13 +264,14 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -274,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -328,13 +333,14 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -342,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -404,13 +410,14 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -418,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -448,13 +455,14 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -462,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -488,13 +496,14 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -502,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -555,13 +564,14 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -569,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -621,13 +631,14 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -635,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -687,13 +698,14 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -701,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -752,13 +764,14 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -766,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -796,13 +809,14 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -810,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -863,13 +877,14 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -877,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -939,13 +954,14 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -953,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -984,13 +1000,14 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -998,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1025,13 +1042,14 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1039,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1091,13 +1109,14 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1105,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1156,13 +1175,14 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1170,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1200,13 +1220,14 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1214,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1267,13 +1288,14 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1281,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1321,6 +1343,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1328,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1378,13 +1401,14 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1392,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1422,13 +1446,14 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1436,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1462,13 +1487,14 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1476,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1551,13 +1577,14 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1565,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1641,13 +1668,14 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1655,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1723,13 +1751,14 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1737,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1789,13 +1818,14 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1803,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1854,13 +1884,14 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1868,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1898,13 +1929,14 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1912,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1965,13 +1997,14 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1979,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ use bumpalo::Bump;
|
||||
use criterion::BenchmarkId;
|
||||
use memmap2::Mmap;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
@@ -110,13 +111,14 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -124,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ http = "1.1.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
|
||||
@@ -136,6 +136,14 @@ pub struct File {
|
||||
}
|
||||
|
||||
impl File {
|
||||
pub fn from_parts(path: PathBuf, file: Option<NamedTempFile>) -> Self {
|
||||
Self { path, file }
|
||||
}
|
||||
|
||||
pub fn into_parts(self) -> (PathBuf, Option<NamedTempFile>) {
|
||||
(self.path, self.file)
|
||||
}
|
||||
|
||||
pub fn dry_file() -> Result<Self> {
|
||||
Ok(Self { path: PathBuf::new(), file: None })
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ use either::Either;
|
||||
use fuzzers::Operation;
|
||||
use milli::documents::mmap_from_objects;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
@@ -128,13 +129,14 @@ fn main() {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -142,7 +144,7 @@ fn main() {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -13,6 +13,9 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
bincode = "1.3.3"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.2"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.20.0"
|
||||
dump = { path = "../dump" }
|
||||
@@ -21,16 +24,15 @@ file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.4"
|
||||
page_size = "0.6.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
memmap2 = "0.9.4"
|
||||
time = { version = "0.3.36", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
@@ -40,12 +42,11 @@ time = { version = "0.3.36", features = [
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
bumpalo = "3.16.0"
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
crossbeam-channel = "0.5.13"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
@@ -115,13 +115,6 @@ pub enum BatchKind {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
SettingsAndDocumentOperation {
|
||||
settings_ids: Vec<TaskId>,
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
@@ -146,7 +139,6 @@ impl BatchKind {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
@@ -154,10 +146,7 @@ impl BatchKind {
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. }
|
||||
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
|
||||
Some(primary_key.as_deref())
|
||||
}
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -275,8 +264,7 @@ impl BatchKind {
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
|
||||
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
@@ -356,15 +344,9 @@ impl BatchKind {
|
||||
) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids: vec![id],
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
}),
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
@@ -477,63 +459,7 @@ impl BatchKind {
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
other: operation_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
|
||||
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method: ReplaceDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk2,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method: UpdateDocuments,
|
||||
allow_index_creation,
|
||||
primary_key: pk2,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// But we can't batch a settings and a doc op with another doc op
|
||||
// this MUST be AFTER the two previous branch
|
||||
(
|
||||
this @ BatchKind::SettingsAndDocumentOperation { .. },
|
||||
K::DocumentDeletion { .. } | K::DocumentImport { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::SettingsAndDocumentOperation {
|
||||
settings_ids,
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
@@ -808,30 +734,30 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_batch_with_settings() {
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// We ensure this kind of batch doesn't batch with forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -859,8 +785,8 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -907,50 +833,6 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
// Then the mixed cases.
|
||||
// The index already exists, whatever is the right of the tasks it shouldn't change the result.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
|
||||
// When the index doesn't exists yet it's more complicated.
|
||||
// Either the first task we encounter create it, in which case we can create a big batch with everything.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
// The right of the tasks following isn't really important.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
|
||||
// Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index.
|
||||
// that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc.
|
||||
// All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
|
||||
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whit what
|
||||
// follows because we first need to process the erronous batch.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -959,13 +841,13 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -104,7 +104,7 @@ pub enum Error {
|
||||
)]
|
||||
InvalidTaskCanceledBy { canceled_by: String },
|
||||
#[error(
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
|
||||
)]
|
||||
InvalidIndexUid { index_uid: String },
|
||||
#[error("Task `{0}` not found.")]
|
||||
@@ -122,8 +122,11 @@ pub enum Error {
|
||||
Dump(#[from] dump::Error),
|
||||
#[error(transparent)]
|
||||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("{}", match .index_uid {
|
||||
Some(uid) if !uid.is_empty() => format!("Index `{}`: {error}", uid),
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_uid: Option<String> },
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error(transparent)]
|
||||
@@ -190,7 +193,7 @@ impl Error {
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
| Error::Milli { .. }
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
@@ -209,6 +212,20 @@ impl Error {
|
||||
pub fn with_custom_error_code(self, code: Code) -> Self {
|
||||
Self::WithCustomErrorCode(code, Box::new(self))
|
||||
}
|
||||
|
||||
pub fn from_milli(err: milli::Error, index_uid: Option<String>) -> Self {
|
||||
match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
milli::Error::UserError(milli::UserError::InvalidFilterExpression { .. }) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
_ => Self::Milli { error: err, index_uid },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
@@ -236,7 +253,7 @@ impl ErrorCode for Error {
|
||||
// TODO: not sure of the Code to use
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::env::VarError;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
use meilisearch_types::milli::Index;
|
||||
use meilisearch_types::milli::{Index, Result};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
|
||||
use crate::clamp_to_page_size;
|
||||
use crate::lru::{InsertionOutcome, LruMap};
|
||||
use crate::{clamp_to_page_size, Result};
|
||||
|
||||
/// Keep an internally consistent view of the open indexes in memory.
|
||||
///
|
||||
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
|
||||
@@ -303,7 +304,15 @@ fn create_or_open_index(
|
||||
) -> Result<Index> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
options.max_readers(1024);
|
||||
|
||||
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
|
||||
Ok(value) => u32::from_str(&value).unwrap(),
|
||||
Err(VarError::NotPresent) => 1024,
|
||||
Err(VarError::NotUnicode(value)) => panic!(
|
||||
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
|
||||
),
|
||||
};
|
||||
options.max_readers(max_readers);
|
||||
if enable_mdb_writemap {
|
||||
unsafe { options.flags(EnvFlags::WRITE_MAP) };
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::{fs, thread};
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -121,7 +122,7 @@ impl IndexStats {
|
||||
/// # Parameters
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
database_size: index.on_disk_size()?,
|
||||
@@ -183,13 +184,18 @@ impl IndexMapper {
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
// This is very unlikely to happen in practice.
|
||||
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
|
||||
let index = self.index_map.write().unwrap().create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
let index = self
|
||||
.index_map
|
||||
.write()
|
||||
.unwrap()
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -357,7 +363,9 @@ impl IndexMapper {
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
// take the lock to reopen the environment.
|
||||
reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
continue;
|
||||
}
|
||||
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
|
||||
@@ -372,13 +380,15 @@ impl IndexMapper {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
|
||||
break index_map.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
break index_map
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
}
|
||||
Available(index) => break index,
|
||||
Closing(_) => {
|
||||
@@ -460,6 +470,7 @@ impl IndexMapper {
|
||||
let index = self.index(rtxn, index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
||||
|
||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
let mut snap = String::new();
|
||||
let Batch { uid, details, stats, started_at, finished_at } = batch;
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
|
||||
if let Some(finished_at) = finished_at {
|
||||
assert!(finished_at > started_at);
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ mod index_mapper;
|
||||
#[cfg(test)]
|
||||
mod insta_snapshot;
|
||||
mod lru;
|
||||
mod processing;
|
||||
mod utils;
|
||||
pub mod uuid_codec;
|
||||
|
||||
@@ -54,14 +55,13 @@ use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFea
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use processing::ProcessingTasks;
|
||||
use rayon::current_num_threads;
|
||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -72,7 +72,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch};
|
||||
use crate::processing::{AtomicTaskStep, BatchProgress};
|
||||
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
|
||||
|
||||
pub(crate) type BEI128 = I128<BE>;
|
||||
|
||||
@@ -163,48 +164,6 @@ impl Query {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ProcessingTasks {
|
||||
batch: Option<ProcessingBatch>,
|
||||
/// The list of tasks ids that are currently running.
|
||||
processing: RoaringBitmap,
|
||||
/// The progress on processing tasks
|
||||
progress: Option<TaskProgress>,
|
||||
}
|
||||
|
||||
impl ProcessingTasks {
|
||||
/// Creates an empty `ProcessingAt` struct.
|
||||
fn new() -> ProcessingTasks {
|
||||
ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None }
|
||||
}
|
||||
|
||||
/// Stores the currently processing tasks, and the date time at which it started.
|
||||
fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) {
|
||||
self.batch = Some(processing_batch);
|
||||
self.processing = processing;
|
||||
}
|
||||
|
||||
fn update_progress(&mut self, progress: Progress) -> TaskProgress {
|
||||
self.progress.get_or_insert_with(TaskProgress::default).update(progress)
|
||||
}
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
fn stop_processing(&mut self) -> Self {
|
||||
self.progress = None;
|
||||
|
||||
Self {
|
||||
batch: std::mem::take(&mut self.batch),
|
||||
processing: std::mem::take(&mut self.processing),
|
||||
progress: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||
fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
||||
!self.processing.is_disjoint(canceled_tasks)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
struct MustStopProcessing(Arc<AtomicBool>);
|
||||
|
||||
@@ -407,7 +366,7 @@ pub struct IndexScheduler {
|
||||
///
|
||||
/// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation.
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
|
||||
test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
|
||||
|
||||
/// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler.
|
||||
///
|
||||
@@ -476,7 +435,7 @@ impl IndexScheduler {
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub fn new(
|
||||
options: IndexSchedulerOptions,
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
|
||||
#[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>,
|
||||
) -> Result<Self> {
|
||||
std::fs::create_dir_all(&options.tasks_path)?;
|
||||
@@ -813,7 +772,7 @@ impl IndexScheduler {
|
||||
let mut batch_tasks = RoaringBitmap::new();
|
||||
for batch_uid in batch_uids {
|
||||
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
||||
batch_tasks |= &processing_tasks;
|
||||
batch_tasks |= &*processing_tasks;
|
||||
} else {
|
||||
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
||||
}
|
||||
@@ -827,13 +786,13 @@ impl IndexScheduler {
|
||||
match status {
|
||||
// special case for Processing tasks
|
||||
Status::Processing => {
|
||||
status_tasks |= &processing_tasks;
|
||||
status_tasks |= &*processing_tasks;
|
||||
}
|
||||
status => status_tasks |= &self.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
tasks -= &processing_tasks;
|
||||
tasks -= &*processing_tasks;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
@@ -882,7 +841,7 @@ impl IndexScheduler {
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
||||
tasks = {
|
||||
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
||||
(&tasks - &processing_tasks, &tasks & &processing_tasks);
|
||||
(&tasks - &*processing_tasks, &tasks & &*processing_tasks);
|
||||
|
||||
// special case for Processing tasks
|
||||
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
||||
@@ -1090,7 +1049,7 @@ impl IndexScheduler {
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
||||
batches = {
|
||||
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
||||
(&batches - &processing.processing, &batches & &processing.processing);
|
||||
(&batches - &*processing.processing, &batches & &*processing.processing);
|
||||
|
||||
// special case for Processing batches
|
||||
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
||||
@@ -1440,7 +1399,7 @@ impl IndexScheduler {
|
||||
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
}
|
||||
@@ -1606,7 +1565,8 @@ impl IndexScheduler {
|
||||
|
||||
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
||||
self.must_stop_processing.reset();
|
||||
self.processing_tasks
|
||||
let progress = self
|
||||
.processing_tasks
|
||||
.write()
|
||||
.unwrap()
|
||||
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
||||
@@ -1619,11 +1579,12 @@ impl IndexScheduler {
|
||||
let res = {
|
||||
let cloned_index_scheduler = self.private_clone();
|
||||
let processing_batch = &mut processing_batch;
|
||||
let progress = progress.clone();
|
||||
std::thread::scope(|s| {
|
||||
let handle = std::thread::Builder::new()
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn_scoped(s, move || {
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch)
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, progress)
|
||||
})
|
||||
.unwrap();
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
@@ -1636,6 +1597,7 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
||||
processing_batch.finished();
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
@@ -1645,12 +1607,15 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::ProcessBatchSucceeded);
|
||||
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
let mut canceled_by = None;
|
||||
|
||||
#[allow(unused_variables)]
|
||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
processing_batch.update(&mut task);
|
||||
if task.status == Status::Canceled {
|
||||
canceled.insert(task.uid);
|
||||
@@ -1678,9 +1643,10 @@ impl IndexScheduler {
|
||||
tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
|
||||
}
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
milli::InternalError::AbortedIndexation,
|
||||
)))
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
|
||||
..
|
||||
})
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AbortedIndexation);
|
||||
@@ -1699,9 +1665,10 @@ impl IndexScheduler {
|
||||
// 2. close the associated environment
|
||||
// 3. resize it
|
||||
// 4. re-schedule tasks
|
||||
Err(Error::Milli(milli::Error::UserError(
|
||||
milli::UserError::MaxDatabaseSizeReached,
|
||||
))) if index_uid.is_some() => {
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
|
||||
..
|
||||
}) if index_uid.is_some() => {
|
||||
// fixme: add index_uid to match to avoid the unwrap
|
||||
let index_uid = index_uid.unwrap();
|
||||
// fixme: handle error more gracefully? not sure when this could happen
|
||||
@@ -1716,8 +1683,12 @@ impl IndexScheduler {
|
||||
Err(err) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::ProcessBatchFailed);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
|
||||
let error: ResponseError = err.into();
|
||||
for id in ids.iter() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
let mut task = self
|
||||
.get_task(&wtxn, id)
|
||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
|
||||
@@ -1738,11 +1709,8 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
// We must re-add the canceled task so they're part of the same batch.
|
||||
// processed.processing |= canceled;
|
||||
ids |= canceled;
|
||||
|
||||
self.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1750,6 +1718,10 @@ impl IndexScheduler {
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
@@ -1942,6 +1914,7 @@ impl IndexScheduler {
|
||||
// TODO: consider using a type alias or a struct embedder/template
|
||||
pub fn embedders(
|
||||
&self,
|
||||
index_uid: String,
|
||||
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
let res: Result<_> = embedding_configs
|
||||
@@ -1952,8 +1925,12 @@ impl IndexScheduler {
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||
..
|
||||
}| {
|
||||
let prompt =
|
||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||
let prompt = Arc::new(
|
||||
prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
@@ -1969,7 +1946,9 @@ impl IndexScheduler {
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?,
|
||||
.map_err(|err| {
|
||||
Error::from_milli(err.into(), Some(index_uid.clone()))
|
||||
})?,
|
||||
);
|
||||
{
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
@@ -2037,14 +2016,21 @@ impl<'a> Dump<'a> {
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, mut file) = self.index_scheduler.create_update_file(false)?;
|
||||
let mut builder = DocumentsBatchBuilder::new(&mut file);
|
||||
let (uuid, file) = self.index_scheduler.create_update_file(false)?;
|
||||
let mut writer = io::BufWriter::new(file);
|
||||
for doc in content_file {
|
||||
builder.append_json_object(&doc?)?;
|
||||
let doc = doc?;
|
||||
serde_json::to_writer(&mut writer, &doc)
|
||||
.map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
builder.into_inner()?;
|
||||
let file = writer.into_inner().map_err(|e| e.into_error())?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
@@ -2052,6 +2038,12 @@ impl<'a> Dump<'a> {
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
None if task.status == Status::Enqueued && task_has_no_docs => {
|
||||
let (uuid, file) = self.index_scheduler.create_update_file(false)?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
@@ -2237,7 +2229,7 @@ mod tests {
|
||||
use std::time::Instant;
|
||||
|
||||
use big_s::S;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use crossbeam_channel::RecvTimeoutError;
|
||||
use file_store::File;
|
||||
use insta::assert_json_snapshot;
|
||||
use maplit::btreeset;
|
||||
@@ -2289,7 +2281,7 @@ mod tests {
|
||||
configuration: impl Fn(&mut IndexSchedulerOptions),
|
||||
) -> (Self, IndexSchedulerHandle) {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let (sender, receiver) = crossbeam::channel::bounded(0);
|
||||
let (sender, receiver) = crossbeam_channel::bounded(0);
|
||||
|
||||
let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
|
||||
|
||||
@@ -2421,7 +2413,7 @@ mod tests {
|
||||
pub struct IndexSchedulerHandle {
|
||||
_tempdir: TempDir,
|
||||
index_scheduler: IndexScheduler,
|
||||
test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
|
||||
test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>,
|
||||
last_breakpoint: Breakpoint,
|
||||
}
|
||||
|
||||
@@ -4318,10 +4310,35 @@ mod tests {
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default())
|
||||
let (mut batches, _) = index_scheduler
|
||||
.get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // only the processing batch in the first tick
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
@@ -6145,7 +6162,7 @@ mod tests {
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders(configs).unwrap();
|
||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed =
|
||||
hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();
|
||||
|
||||
316
crates/index-scheduler/src/processing.rs
Normal file
316
crates/index-scheduler/src/processing.rs
Normal file
@@ -0,0 +1,316 @@
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use enum_iterator::Sequence;
|
||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
|
||||
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::utils::ProcessingBatch;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ProcessingTasks {
|
||||
pub batch: Option<Arc<ProcessingBatch>>,
|
||||
/// The list of tasks ids that are currently running.
|
||||
pub processing: Arc<RoaringBitmap>,
|
||||
/// The progress on processing tasks
|
||||
pub progress: Option<Progress>,
|
||||
}
|
||||
|
||||
impl ProcessingTasks {
|
||||
/// Creates an empty `ProcessingAt` struct.
|
||||
pub fn new() -> ProcessingTasks {
|
||||
ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
|
||||
}
|
||||
|
||||
pub fn get_progress_view(&self) -> Option<ProgressView> {
|
||||
Some(self.progress.as_ref()?.as_progress_view())
|
||||
}
|
||||
|
||||
/// Stores the currently processing tasks, and the date time at which it started.
|
||||
pub fn start_processing(
|
||||
&mut self,
|
||||
processing_batch: ProcessingBatch,
|
||||
processing: RoaringBitmap,
|
||||
) -> Progress {
|
||||
self.batch = Some(Arc::new(processing_batch));
|
||||
self.processing = Arc::new(processing);
|
||||
let progress = Progress::default();
|
||||
progress.update_progress(BatchProgress::ProcessingTasks);
|
||||
self.progress = Some(progress.clone());
|
||||
|
||||
progress
|
||||
}
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
pub fn stop_processing(&mut self) -> Self {
|
||||
self.progress = None;
|
||||
|
||||
Self {
|
||||
batch: std::mem::take(&mut self.batch),
|
||||
processing: std::mem::take(&mut self.processing),
|
||||
progress: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
||||
!self.processing.is_disjoint(canceled_tasks)
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum BatchProgress {
|
||||
ProcessingTasks,
|
||||
WritingTasksToDisk,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
UpdatingTasks,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskDeletionProgress {
|
||||
DeletingTasksDateTime,
|
||||
DeletingTasksMetadata,
|
||||
DeletingTasks,
|
||||
DeletingBatches,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SnapshotCreationProgress {
|
||||
StartTheSnapshotCreation,
|
||||
SnapshotTheIndexScheduler,
|
||||
SnapshotTheUpdateFiles,
|
||||
SnapshotTheIndexes,
|
||||
SnapshotTheApiKeys,
|
||||
CreateTheTarball,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DumpCreationProgress {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheTasks,
|
||||
DumpTheIndexes,
|
||||
DumpTheExperimentalFeatures,
|
||||
CompressTheDump,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum CreateIndexProgress {
|
||||
CreatingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum UpdateIndexProgress {
|
||||
UpdatingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DeleteIndexProgress {
|
||||
DeletingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SwappingTheIndexes {
|
||||
EnsuringCorrectnessOfTheSwap,
|
||||
SwappingTheIndexes,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum InnerSwappingTwoIndexes {
|
||||
RetrieveTheTasks,
|
||||
UpdateTheTasks,
|
||||
UpdateTheIndexesMetadata,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentOperationProgress {
|
||||
RetrievingConfig,
|
||||
ComputingDocumentChanges,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentEditionProgress {
|
||||
RetrievingConfig,
|
||||
ComputingDocumentChanges,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentDeletionProgress {
|
||||
RetrievingConfig,
|
||||
DeleteDocuments,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SettingsProgress {
|
||||
RetrievingAndMergingTheSettings,
|
||||
ApplyTheSettings,
|
||||
}
|
||||
}
|
||||
|
||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
pub struct VariableNameStep {
|
||||
name: String,
|
||||
current: u32,
|
||||
total: u32,
|
||||
}
|
||||
|
||||
impl VariableNameStep {
|
||||
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
||||
Self { name: name.into(), current, total }
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for VariableNameStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.name.clone().into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.total
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn one_level() {
|
||||
let mut processing = ProcessingTasks::new();
|
||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 0.0
|
||||
}
|
||||
"#);
|
||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 50.0
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_progress() {
|
||||
let mut processing = ProcessingTasks::new();
|
||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||
let (atomic, tasks) = AtomicTaskStep::new(10);
|
||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 0,
|
||||
"total": 10
|
||||
}
|
||||
],
|
||||
"percentage": 0.0
|
||||
}
|
||||
"#);
|
||||
atomic.fetch_add(6, Ordering::Relaxed);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 6,
|
||||
"total": 10
|
||||
}
|
||||
],
|
||||
"percentage": 30.000002
|
||||
}
|
||||
"#);
|
||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 50.0
|
||||
}
|
||||
"#);
|
||||
let (atomic, tasks) = AtomicTaskStep::new(5);
|
||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||
atomic.fetch_add(4, Ordering::Relaxed);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 4,
|
||||
"total": 5
|
||||
}
|
||||
],
|
||||
"percentage": 90.0
|
||||
}
|
||||
"#);
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"dumpCreation":1},"indexUids":{}}, }
|
||||
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -9,8 +9,8 @@ source: crates/index-scheduler/src/lib.rs
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||
5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[3,]
|
||||
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
||||
@@ -67,7 +67,7 @@ impl ProcessingBatch {
|
||||
task.batch_uid = Some(self.uid);
|
||||
// We don't store the statuses in the map since they're all enqueued but we must
|
||||
// still store them in the stats since that can be displayed.
|
||||
*self.stats.status.entry(task.status).or_default() += 1;
|
||||
*self.stats.status.entry(Status::Processing).or_default() += 1;
|
||||
|
||||
self.kinds.insert(task.kind.as_kind());
|
||||
*self.stats.types.entry(task.kind.as_kind()).or_default() += 1;
|
||||
@@ -106,7 +106,7 @@ impl ProcessingBatch {
|
||||
self.stats.total_nb_tasks = 0;
|
||||
}
|
||||
|
||||
/// Update the timestamp of the tasks and the inner structure of this sturcture.
|
||||
/// Update the timestamp of the tasks and the inner structure of this structure.
|
||||
pub fn update(&mut self, task: &mut Task) {
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state
|
||||
@@ -134,6 +134,7 @@ impl ProcessingBatch {
|
||||
pub fn to_batch(&self) -> Batch {
|
||||
Batch {
|
||||
uid: self.uid,
|
||||
progress: None,
|
||||
details: self.details.clone(),
|
||||
stats: self.stats.clone(),
|
||||
started_at: self.started_at,
|
||||
@@ -187,6 +188,7 @@ impl IndexScheduler {
|
||||
&batch.uid,
|
||||
&Batch {
|
||||
uid: batch.uid,
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
started_at: batch.started_at,
|
||||
@@ -273,7 +275,9 @@ impl IndexScheduler {
|
||||
.into_iter()
|
||||
.map(|batch_id| {
|
||||
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
Ok(processing.batch.as_ref().unwrap().to_batch())
|
||||
let mut batch = processing.batch.as_ref().unwrap().to_batch();
|
||||
batch.progress = processing.get_progress_view();
|
||||
Ok(batch)
|
||||
} else {
|
||||
self.get_batch(rtxn, batch_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
@@ -287,7 +291,10 @@ impl IndexScheduler {
|
||||
|
||||
debug_assert!(old_task != *task);
|
||||
debug_assert_eq!(old_task.uid, task.uid);
|
||||
debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some());
|
||||
debug_assert!(
|
||||
old_task.batch_uid.is_none() && task.batch_uid.is_some(),
|
||||
"\n==> old: {old_task:?}\n==> new: {task:?}"
|
||||
);
|
||||
|
||||
if old_task.status != task.status {
|
||||
self.update_status(wtxn, old_task.status, |bitmap| {
|
||||
|
||||
@@ -17,7 +17,7 @@ hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
|
||||
@@ -24,8 +24,9 @@ flate2 = "1.0.30"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.4"
|
||||
milli = { path = "../milli" }
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
bumparaw-collections = "0.1.2"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
rustc-hash = "2.1.0"
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.120"
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
use milli::progress::ProgressView;
|
||||
use serde::Serialize;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::{
|
||||
batches::{Batch, BatchId, BatchStats},
|
||||
task_view::DetailsView,
|
||||
tasks::serialize_duration,
|
||||
};
|
||||
use crate::batches::{Batch, BatchId, BatchStats};
|
||||
use crate::task_view::DetailsView;
|
||||
use crate::tasks::serialize_duration;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct BatchView {
|
||||
pub uid: BatchId,
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
@@ -25,6 +25,7 @@ impl BatchView {
|
||||
pub fn from_batch(batch: &Batch) -> Self {
|
||||
Self {
|
||||
uid: batch.uid,
|
||||
progress: batch.progress.clone(),
|
||||
details: batch.details.clone(),
|
||||
stats: batch.stats.clone(),
|
||||
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::progress::ProgressView;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{
|
||||
task_view::DetailsView,
|
||||
tasks::{Kind, Status},
|
||||
};
|
||||
use crate::task_view::DetailsView;
|
||||
use crate::tasks::{Kind, Status};
|
||||
|
||||
pub type BatchId = u32;
|
||||
|
||||
@@ -15,6 +14,8 @@ pub type BatchId = u32;
|
||||
pub struct Batch {
|
||||
pub uid: BatchId,
|
||||
|
||||
#[serde(skip)]
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
|
||||
|
||||
@@ -4,10 +4,11 @@ use std::io::{self, BufWriter};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::RawMap;
|
||||
use memmap2::Mmap;
|
||||
use milli::documents::Error;
|
||||
use milli::Object;
|
||||
use raw_collections::RawMap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde::de::{SeqAccess, Visitor};
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use serde_json::error::Category;
|
||||
@@ -214,13 +215,13 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
|
||||
// We memory map to be able to deserialize into a RawMap that
|
||||
// does not allocate when possible and only materialize the first/top level.
|
||||
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
|
||||
let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB
|
||||
let mut doc_alloc = Bump::with_capacity(1024 * 1024); // 1MiB
|
||||
|
||||
let mut out = BufWriter::new(output);
|
||||
let mut deserializer = serde_json::Deserializer::from_slice(&input);
|
||||
let res = array_each(&mut deserializer, |obj: &RawValue| {
|
||||
doc_alloc.reset();
|
||||
let map = RawMap::from_raw_value(obj, &doc_alloc)?;
|
||||
let map = RawMap::from_raw_value_and_hasher(obj, FxBuildHasher, &doc_alloc)?;
|
||||
to_writer(&mut out, &map)
|
||||
});
|
||||
let count = match res {
|
||||
@@ -250,26 +251,25 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
|
||||
pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
|
||||
/// Reads NDJSON from file and checks it.
|
||||
pub fn read_ndjson(input: &File) -> Result<u64> {
|
||||
// We memory map to be able to deserialize into a RawMap that
|
||||
// does not allocate when possible and only materialize the first/top level.
|
||||
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
|
||||
let mut output = BufWriter::new(output);
|
||||
|
||||
let mut bump = Bump::with_capacity(1024 * 1024);
|
||||
|
||||
let mut count = 0;
|
||||
for result in serde_json::Deserializer::from_slice(&input).into_iter() {
|
||||
bump.reset();
|
||||
count += 1;
|
||||
result
|
||||
.and_then(|raw: &RawValue| {
|
||||
match result {
|
||||
Ok(raw) => {
|
||||
// try to deserialize as a map
|
||||
let map = RawMap::from_raw_value(raw, &bump)?;
|
||||
to_writer(&mut output, &map)
|
||||
})
|
||||
.map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
|
||||
RawMap::from_raw_value_and_hasher(raw, FxBuildHasher, &bump)
|
||||
.map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
|
||||
count += 1;
|
||||
}
|
||||
Err(e) => return Err(DocumentFormatError::from((PayloadType::Ndjson, e))),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
|
||||
@@ -279,6 +279,7 @@ InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -549,7 +550,7 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||
"the value of `id` is invalid. \
|
||||
A document identifier can be of type integer or string, \
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||
and can not be more than 512 bytes."
|
||||
and can not be more than 511 bytes."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::fmt::{Display, Write};
|
||||
use std::str::FromStr;
|
||||
|
||||
use enum_iterator::Sequence;
|
||||
use milli::update::new::indexer::document_changes::Progress;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use milli::Object;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -41,62 +40,6 @@ pub struct Task {
|
||||
pub kind: KindWithContent,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskProgress {
|
||||
pub current_step: &'static str,
|
||||
pub finished_steps: u16,
|
||||
pub total_steps: u16,
|
||||
pub finished_substeps: Option<u32>,
|
||||
pub total_substeps: Option<u32>,
|
||||
}
|
||||
|
||||
impl Default for TaskProgress {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl TaskProgress {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
current_step: "start",
|
||||
finished_steps: 0,
|
||||
total_steps: 1,
|
||||
finished_substeps: None,
|
||||
total_substeps: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update(&mut self, progress: Progress) -> TaskProgress {
|
||||
if self.finished_steps > progress.finished_steps {
|
||||
return *self;
|
||||
}
|
||||
|
||||
if self.current_step != progress.step_name {
|
||||
self.current_step = progress.step_name
|
||||
}
|
||||
|
||||
self.total_steps = progress.total_steps;
|
||||
|
||||
if self.finished_steps < progress.finished_steps {
|
||||
self.finished_substeps = None;
|
||||
self.total_substeps = None;
|
||||
}
|
||||
self.finished_steps = progress.finished_steps;
|
||||
if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep {
|
||||
if let Some(task_finished_substeps) = self.finished_substeps {
|
||||
if task_finished_substeps > finished_substeps {
|
||||
return *self;
|
||||
}
|
||||
}
|
||||
self.finished_substeps = Some(finished_substeps);
|
||||
self.total_substeps = Some(total_substeps);
|
||||
}
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl Task {
|
||||
pub fn index_uid(&self) -> Option<&str> {
|
||||
use KindWithContent::*;
|
||||
|
||||
@@ -103,7 +103,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.11"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.2"
|
||||
roaring = "0.10.7"
|
||||
mopa-maintained = "0.2.3"
|
||||
|
||||
[dev-dependencies]
|
||||
@@ -157,5 +157,5 @@ german = ["meilisearch-types/german"]
|
||||
turkish = ["meilisearch-types/turkish"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.15/build.zip"
|
||||
sha1 = "d057600b4a839a2e0c0be7a372cd1b2683f3ca7e"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.16/build.zip"
|
||||
sha1 = "68f83438a114aabbe76bc9fe480071e741996662"
|
||||
|
||||
@@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType};
|
||||
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
@@ -62,8 +63,11 @@ pub enum MeilisearchHttpError {
|
||||
HeedError(#[from] meilisearch_types::heed::Error),
|
||||
#[error(transparent)]
|
||||
IndexScheduler(#[from] index_scheduler::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] meilisearch_types::milli::Error),
|
||||
#[error("{}", match .index_name {
|
||||
Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name),
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_name: Option<String> },
|
||||
#[error(transparent)]
|
||||
Payload(#[from] PayloadError),
|
||||
#[error(transparent)]
|
||||
@@ -76,6 +80,12 @@ pub enum MeilisearchHttpError {
|
||||
MissingSearchHybrid,
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
pub(crate) fn from_milli(error: milli::Error, index_name: Option<String>) -> Self {
|
||||
Self::Milli { error, index_name }
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
@@ -95,7 +105,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
||||
MeilisearchHttpError::Milli(e) => e.error_code(),
|
||||
MeilisearchHttpError::Milli { error, .. } => error.error_code(),
|
||||
MeilisearchHttpError::Payload(e) => e.error_code(),
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
|
||||
@@ -395,6 +395,7 @@ fn import_dump(
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
let uid = metadata.uid.clone();
|
||||
tracing::info!("Importing index `{}`.", metadata.uid);
|
||||
|
||||
let date = Some((metadata.created_at, metadata.updated_at));
|
||||
@@ -432,7 +433,7 @@ fn import_dump(
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
let embedders = index_scheduler.embedders(uid, embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
|
||||
@@ -20,14 +20,14 @@ use meilisearch::{
|
||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use mimalloc::MiMalloc;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: MiMalloc = MiMalloc;
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn default_log_route_layer() -> LogRouteType {
|
||||
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
|
||||
@@ -129,6 +129,11 @@ async fn try_main() -> anyhow::Result<()> {
|
||||
|
||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||
|
||||
tokio::spawn(async move {
|
||||
tokio::signal::ctrl_c().await.unwrap();
|
||||
std::process::exit(130);
|
||||
});
|
||||
|
||||
run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
use actix_web::{
|
||||
web::{self, Data},
|
||||
HttpResponse,
|
||||
};
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::HttpResponse;
|
||||
use deserr::actix_web::AwebQueryParameter;
|
||||
use index_scheduler::{IndexScheduler, Query};
|
||||
use meilisearch_types::{
|
||||
batch_view::BatchView, batches::BatchId, deserr::DeserrQueryParamError, error::ResponseError,
|
||||
keys::actions,
|
||||
};
|
||||
use meilisearch_types::batch_view::BatchView;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::extractors::{authentication::GuardedData, sequential_extractor::SeqHandler};
|
||||
|
||||
use super::{tasks::TasksFilterQuery, ActionPolicy};
|
||||
use super::tasks::TasksFilterQuery;
|
||||
use super::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::collections::HashSet;
|
||||
use std::io::ErrorKind;
|
||||
use std::io::{ErrorKind, Seek as _};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
@@ -572,7 +572,7 @@ async fn document_addition(
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
csv_delimiter: Option<u8>,
|
||||
mut body: Payload,
|
||||
body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
@@ -609,54 +609,60 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
|
||||
let documents_count = match format {
|
||||
PayloadType::Ndjson => {
|
||||
let (path, file) = update_file.into_parts();
|
||||
let file = match file {
|
||||
Some(file) => {
|
||||
let (file, path) = file.into_parts();
|
||||
let mut file = copy_body_to_file(file, body, format).await?;
|
||||
file.rewind().map_err(|e| {
|
||||
index_scheduler::Error::FileStore(file_store::Error::IoError(e))
|
||||
})?;
|
||||
Some(tempfile::NamedTempFile::from_parts(file, path))
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let temp_file = match tempfile() {
|
||||
Ok(file) => file,
|
||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
|
||||
read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
|
||||
})?;
|
||||
|
||||
let update_file = file_store::File::from_parts(path, file);
|
||||
update_file.persist()?;
|
||||
|
||||
Ok(documents_count)
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(documents_count)
|
||||
}
|
||||
PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
|
||||
let temp_file = match tempfile() {
|
||||
Ok(file) => file,
|
||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||
};
|
||||
|
||||
let read_file = copy_body_to_file(temp_file, body, format).await?;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let documents_count = match format {
|
||||
PayloadType::Json => read_json(&read_file, &mut update_file)?,
|
||||
PayloadType::Csv { delimiter } => {
|
||||
read_csv(&read_file, &mut update_file, delimiter)?
|
||||
}
|
||||
PayloadType::Ndjson => {
|
||||
unreachable!("We already wrote the user content into the update file")
|
||||
}
|
||||
};
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
Ok(documents_count)
|
||||
})
|
||||
.await
|
||||
}
|
||||
};
|
||||
|
||||
let async_file = File::from_std(temp_file);
|
||||
let mut buffer = BufWriter::new(async_file);
|
||||
|
||||
let mut buffer_write_size: usize = 0;
|
||||
while let Some(result) = body.next().await {
|
||||
let byte = result?;
|
||||
|
||||
if byte.is_empty() && buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
match buffer.write_all(&byte).await {
|
||||
Ok(()) => buffer_write_size += 1,
|
||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.flush().await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
|
||||
if buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
|
||||
let read_file = buffer.into_inner().into_std().await;
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = match format {
|
||||
PayloadType::Json => read_json(&read_file, &mut update_file)?,
|
||||
PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
|
||||
PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
|
||||
};
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
Ok(documents_count)
|
||||
})
|
||||
.await;
|
||||
|
||||
let documents_count = match documents_count {
|
||||
Ok(Ok(documents_count)) => documents_count,
|
||||
// in this case the file has not possibly be persisted.
|
||||
@@ -703,6 +709,39 @@ async fn document_addition(
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
async fn copy_body_to_file(
|
||||
output: std::fs::File,
|
||||
mut body: Payload,
|
||||
format: PayloadType,
|
||||
) -> Result<std::fs::File, MeilisearchHttpError> {
|
||||
let async_file = File::from_std(output);
|
||||
let mut buffer = BufWriter::new(async_file);
|
||||
let mut buffer_write_size: usize = 0;
|
||||
while let Some(result) = body.next().await {
|
||||
let byte = result?;
|
||||
|
||||
if byte.is_empty() && buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
match buffer.write_all(&byte).await {
|
||||
Ok(()) => buffer_write_size += 1,
|
||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||
}
|
||||
}
|
||||
if let Err(e) = buffer.flush().await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
if buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
let read_file = buffer.into_inner().into_std().await;
|
||||
Ok(read_file)
|
||||
}
|
||||
|
||||
pub async fn delete_documents_batch(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
|
||||
@@ -185,7 +185,8 @@ pub async fn search(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index, features)?;
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(
|
||||
|
||||
@@ -5,7 +5,7 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use index_scheduler::{Error, IndexScheduler};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
@@ -107,7 +107,10 @@ pub async fn list_indexes(
|
||||
if !filters.is_index_authorized(uid) {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(Some(IndexView::new(uid.to_string(), index)?))
|
||||
Ok(Some(
|
||||
IndexView::new(uid.to_string(), index)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
))
|
||||
})?;
|
||||
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
|
||||
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
|
||||
|
||||
@@ -243,11 +243,19 @@ pub async fn search_with_url_query(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -287,12 +295,20 @@ pub async fn search_with_post(
|
||||
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -314,6 +330,7 @@ pub async fn search_with_post(
|
||||
pub fn search_kind(
|
||||
query: &SearchQuery,
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &milli::Index,
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchKind, ResponseError> {
|
||||
@@ -332,7 +349,7 @@ pub fn search_kind(
|
||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid.semantic_ratio == 1.0 => vector
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// hybrid.semantic_ratio == 0.0 => keyword
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
@@ -340,13 +357,14 @@ pub fn search_kind(
|
||||
}
|
||||
// no query, hybrid, vector => semantic
|
||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
|
||||
}
|
||||
// query, no hybrid, no vector => keyword
|
||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||
// query, hybrid, maybe vector => hybrid
|
||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
index,
|
||||
embedder,
|
||||
**semantic_ratio,
|
||||
|
||||
@@ -17,6 +17,32 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
/// This macro generates the routes for the settings.
|
||||
///
|
||||
/// It takes a list of settings and generates a module for each setting.
|
||||
/// Each module contains the `get`, `update` and `delete` routes for the setting.
|
||||
///
|
||||
/// It also generates a `configure` function that configures the routes for the settings.
|
||||
macro_rules! make_setting_routes {
|
||||
($({route: $route:literal, update_verb: $update_verb:ident, value_type: $type:ty, err_type: $err_ty:ty, attr: $attr:ident, camelcase_attr: $camelcase_attr:literal, analytics: $analytics:ident},)*) => {
|
||||
$(
|
||||
make_setting_route!($route, $update_verb, $type, $err_ty, $attr, $camelcase_attr, $analytics);
|
||||
)*
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::patch().to(SeqHandler(update_all)))
|
||||
.route(web::get().to(SeqHandler(get_all)))
|
||||
.route(web::delete().to(SeqHandler(delete_all))))
|
||||
$(.service($attr::resources()))*;
|
||||
}
|
||||
|
||||
pub const ALL_SETTINGS_NAMES: &[&str] = &[$(stringify!($attr)),*];
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => {
|
||||
@@ -153,279 +179,227 @@ macro_rules! make_setting_route {
|
||||
};
|
||||
}
|
||||
|
||||
make_setting_route!(
|
||||
"/filterable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
|
||||
>,
|
||||
filterable_attributes,
|
||||
"filterableAttributes",
|
||||
FilterableAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/sortable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
|
||||
>,
|
||||
sortable_attributes,
|
||||
"sortableAttributes",
|
||||
SortableAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/displayed-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
|
||||
>,
|
||||
displayed_attributes,
|
||||
"displayedAttributes",
|
||||
DisplayedAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/typo-tolerance",
|
||||
patch,
|
||||
meilisearch_types::settings::TypoSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
|
||||
>,
|
||||
typo_tolerance,
|
||||
"typoTolerance",
|
||||
TypoToleranceAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/searchable-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
|
||||
>,
|
||||
searchable_attributes,
|
||||
"searchableAttributes",
|
||||
SearchableAttributesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/stop-words",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
|
||||
>,
|
||||
stop_words,
|
||||
"stopWords",
|
||||
StopWordsAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/non-separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
non_separator_tokens,
|
||||
"nonSeparatorTokens",
|
||||
NonSeparatorTokensAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
separator_tokens,
|
||||
"separatorTokens",
|
||||
SeparatorTokensAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/dictionary",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
dictionary,
|
||||
"dictionary",
|
||||
DictionaryAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
std::collections::BTreeMap<String, Vec<String>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
|
||||
>,
|
||||
synonyms,
|
||||
"synonyms",
|
||||
SynonymsAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/distinct-attribute",
|
||||
put,
|
||||
String,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
|
||||
>,
|
||||
distinct_attribute,
|
||||
"distinctAttribute",
|
||||
DistinctAttributeAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/proximity-precision",
|
||||
put,
|
||||
meilisearch_types::settings::ProximityPrecisionView,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||
>,
|
||||
proximity_precision,
|
||||
"proximityPrecision",
|
||||
ProximityPrecisionAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/localized-attributes",
|
||||
put,
|
||||
Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
|
||||
>,
|
||||
localized_attributes,
|
||||
"localizedAttributes",
|
||||
LocalesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/ranking-rules",
|
||||
put,
|
||||
Vec<meilisearch_types::settings::RankingRuleView>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
|
||||
>,
|
||||
ranking_rules,
|
||||
"rankingRules",
|
||||
RankingRulesAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/faceting",
|
||||
patch,
|
||||
meilisearch_types::settings::FacetingSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
|
||||
>,
|
||||
faceting,
|
||||
"faceting",
|
||||
FacetingAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/pagination",
|
||||
patch,
|
||||
meilisearch_types::settings::PaginationSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
|
||||
>,
|
||||
pagination,
|
||||
"pagination",
|
||||
PaginationAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/embedders",
|
||||
patch,
|
||||
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
|
||||
>,
|
||||
embedders,
|
||||
"embedders",
|
||||
EmbeddersAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/search-cutoff-ms",
|
||||
put,
|
||||
u64,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
|
||||
>,
|
||||
search_cutoff_ms,
|
||||
"searchCutoffMs",
|
||||
SearchCutoffMsAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/facet-search",
|
||||
put,
|
||||
bool,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFacetSearch,
|
||||
>,
|
||||
facet_search,
|
||||
"facetSearch",
|
||||
FacetSearchAnalytics
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/prefix-search",
|
||||
put,
|
||||
meilisearch_types::settings::PrefixSearchSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPrefixSearch,
|
||||
>,
|
||||
prefix_search,
|
||||
"prefixSearch",
|
||||
PrefixSearchAnalytics
|
||||
);
|
||||
|
||||
macro_rules! generate_configure {
|
||||
($($mod:ident),*) => {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::patch().to(SeqHandler(update_all)))
|
||||
.route(web::get().to(SeqHandler(get_all)))
|
||||
.route(web::delete().to(SeqHandler(delete_all))))
|
||||
$(.service($mod::resources()))*;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
generate_configure!(
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
displayed_attributes,
|
||||
localized_attributes,
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
pagination,
|
||||
faceting,
|
||||
embedders,
|
||||
search_cutoff_ms
|
||||
make_setting_routes!(
|
||||
{
|
||||
route: "/filterable-attributes",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
|
||||
>,
|
||||
attr: filterable_attributes,
|
||||
camelcase_attr: "filterableAttributes",
|
||||
analytics: FilterableAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/sortable-attributes",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
|
||||
>,
|
||||
attr: sortable_attributes,
|
||||
camelcase_attr: "sortableAttributes",
|
||||
analytics: SortableAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/displayed-attributes",
|
||||
update_verb: put,
|
||||
value_type: Vec<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
|
||||
>,
|
||||
attr: displayed_attributes,
|
||||
camelcase_attr: "displayedAttributes",
|
||||
analytics: DisplayedAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/typo-tolerance",
|
||||
update_verb: patch,
|
||||
value_type: meilisearch_types::settings::TypoSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
|
||||
>,
|
||||
attr: typo_tolerance,
|
||||
camelcase_attr: "typoTolerance",
|
||||
analytics: TypoToleranceAnalytics
|
||||
},
|
||||
{
|
||||
route: "/searchable-attributes",
|
||||
update_verb: put,
|
||||
value_type: Vec<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
|
||||
>,
|
||||
attr: searchable_attributes,
|
||||
camelcase_attr: "searchableAttributes",
|
||||
analytics: SearchableAttributesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/stop-words",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
|
||||
>,
|
||||
attr: stop_words,
|
||||
camelcase_attr: "stopWords",
|
||||
analytics: StopWordsAnalytics
|
||||
},
|
||||
{
|
||||
route: "/non-separator-tokens",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
attr: non_separator_tokens,
|
||||
camelcase_attr: "nonSeparatorTokens",
|
||||
analytics: NonSeparatorTokensAnalytics
|
||||
},
|
||||
{
|
||||
route: "/separator-tokens",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
attr: separator_tokens,
|
||||
camelcase_attr: "separatorTokens",
|
||||
analytics: SeparatorTokensAnalytics
|
||||
},
|
||||
{
|
||||
route: "/dictionary",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
attr: dictionary,
|
||||
camelcase_attr: "dictionary",
|
||||
analytics: DictionaryAnalytics
|
||||
},
|
||||
{
|
||||
route: "/synonyms",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeMap<String, Vec<String>>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
|
||||
>,
|
||||
attr: synonyms,
|
||||
camelcase_attr: "synonyms",
|
||||
analytics: SynonymsAnalytics
|
||||
},
|
||||
{
|
||||
route: "/distinct-attribute",
|
||||
update_verb: put,
|
||||
value_type: String,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
|
||||
>,
|
||||
attr: distinct_attribute,
|
||||
camelcase_attr: "distinctAttribute",
|
||||
analytics: DistinctAttributeAnalytics
|
||||
},
|
||||
{
|
||||
route: "/proximity-precision",
|
||||
update_verb: put,
|
||||
value_type: meilisearch_types::settings::ProximityPrecisionView,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||
>,
|
||||
attr: proximity_precision,
|
||||
camelcase_attr: "proximityPrecision",
|
||||
analytics: ProximityPrecisionAnalytics
|
||||
},
|
||||
{
|
||||
route: "/localized-attributes",
|
||||
update_verb: put,
|
||||
value_type: Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
|
||||
>,
|
||||
attr: localized_attributes,
|
||||
camelcase_attr: "localizedAttributes",
|
||||
analytics: LocalesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/ranking-rules",
|
||||
update_verb: put,
|
||||
value_type: Vec<meilisearch_types::settings::RankingRuleView>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
|
||||
>,
|
||||
attr: ranking_rules,
|
||||
camelcase_attr: "rankingRules",
|
||||
analytics: RankingRulesAnalytics
|
||||
},
|
||||
{
|
||||
route: "/faceting",
|
||||
update_verb: patch,
|
||||
value_type: meilisearch_types::settings::FacetingSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
|
||||
>,
|
||||
attr: faceting,
|
||||
camelcase_attr: "faceting",
|
||||
analytics: FacetingAnalytics
|
||||
},
|
||||
{
|
||||
route: "/pagination",
|
||||
update_verb: patch,
|
||||
value_type: meilisearch_types::settings::PaginationSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
|
||||
>,
|
||||
attr: pagination,
|
||||
camelcase_attr: "pagination",
|
||||
analytics: PaginationAnalytics
|
||||
},
|
||||
{
|
||||
route: "/embedders",
|
||||
update_verb: patch,
|
||||
value_type: std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
|
||||
>,
|
||||
attr: embedders,
|
||||
camelcase_attr: "embedders",
|
||||
analytics: EmbeddersAnalytics
|
||||
},
|
||||
{
|
||||
route: "/search-cutoff-ms",
|
||||
update_verb: put,
|
||||
value_type: u64,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
|
||||
>,
|
||||
attr: search_cutoff_ms,
|
||||
camelcase_attr: "searchCutoffMs",
|
||||
analytics: SearchCutoffMsAnalytics
|
||||
},
|
||||
{
|
||||
route: "/facet-search",
|
||||
update_verb: put,
|
||||
value_type: bool,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFacetSearch,
|
||||
>,
|
||||
attr: facet_search,
|
||||
camelcase_attr: "facetSearch",
|
||||
analytics: FacetSearchAnalytics
|
||||
},
|
||||
{
|
||||
route: "/prefix-search",
|
||||
update_verb: put,
|
||||
value_type: meilisearch_types::settings::PrefixSearchSettings,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPrefixSearch,
|
||||
>,
|
||||
attr: prefix_search,
|
||||
camelcase_attr: "prefixSearch",
|
||||
analytics: PrefixSearchAnalytics
|
||||
},
|
||||
);
|
||||
|
||||
pub async fn update_all(
|
||||
|
||||
@@ -103,8 +103,13 @@ async fn similar(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let (embedder_name, embedder, quantized) =
|
||||
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
||||
let (embedder_name, embedder, quantized) = SearchKind::embedder(
|
||||
&index_scheduler,
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
&query.embedder,
|
||||
None,
|
||||
)?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
perform_similar(
|
||||
|
||||
@@ -125,14 +125,28 @@ pub async fn multi_search_with_post(
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), &index, features)
|
||||
.with_index(query_index)?;
|
||||
let index_uid_str = index_uid.to_string();
|
||||
|
||||
let search_kind = search_kind(
|
||||
&query,
|
||||
index_scheduler.get_ref(),
|
||||
index_uid_str.clone(),
|
||||
&index,
|
||||
features,
|
||||
)
|
||||
.with_index(query_index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector, features)
|
||||
perform_search(
|
||||
index_uid_str.clone(),
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
features,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
@@ -560,7 +560,8 @@ pub fn perform_federated_search(
|
||||
// use an immediately invoked lambda to capture the result without returning from the function
|
||||
|
||||
let res: Result<(), ResponseError> = (|| {
|
||||
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler, index_uid.to_string(), &index, features)?;
|
||||
|
||||
let canonicalization_kind = match (&search_kind, &query.q) {
|
||||
(SearchKind::SemanticOnly { .. }, _) => {
|
||||
@@ -636,7 +637,8 @@ pub fn perform_federated_search(
|
||||
search.offset(0);
|
||||
search.limit(required_hit_count);
|
||||
|
||||
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
|
||||
let (result, _semantic_hit_count) =
|
||||
super::search_from_kind(index_uid.to_string(), search_kind, search)?;
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve: query.attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
@@ -670,7 +672,10 @@ pub fn perform_federated_search(
|
||||
|
||||
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||
|
||||
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
|
||||
let hit_maker =
|
||||
HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| {
|
||||
MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
|
||||
})?;
|
||||
|
||||
results_by_query.push(SearchResultByQuery {
|
||||
federation_options,
|
||||
|
||||
@@ -19,7 +19,9 @@ use meilisearch_types::locales::Locale;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::vector::Embedder;
|
||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||
use meilisearch_types::milli::{
|
||||
FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget,
|
||||
};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::{Language, TokenizerBuilder};
|
||||
@@ -281,35 +283,38 @@ pub enum SearchKind {
|
||||
impl SearchKind {
|
||||
pub(crate) fn semantic(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
embedder_name: &str,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<Self, ResponseError> {
|
||||
let (embedder_name, embedder, quantized) =
|
||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||
Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
|
||||
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
|
||||
}
|
||||
|
||||
pub(crate) fn hybrid(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
embedder_name: &str,
|
||||
semantic_ratio: f32,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<Self, ResponseError> {
|
||||
let (embedder_name, embedder, quantized) =
|
||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||
Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
|
||||
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
|
||||
}
|
||||
|
||||
pub(crate) fn embedder(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
embedder_name: &str,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
|
||||
|
||||
let (embedder, _, quantized) = embedders
|
||||
.get(embedder_name)
|
||||
@@ -890,6 +895,7 @@ fn prepare_search<'t>(
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
@@ -916,7 +922,7 @@ pub fn perform_search(
|
||||
used_negative_operator,
|
||||
},
|
||||
semantic_hit_count,
|
||||
) = search_from_kind(search_kind, search)?;
|
||||
) = search_from_kind(index_uid, search_kind, search)?;
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
@@ -1069,17 +1075,27 @@ fn compute_facet_distribution_stats<S: AsRef<str>>(
|
||||
}
|
||||
|
||||
pub fn search_from_kind(
|
||||
index_uid: String,
|
||||
search_kind: SearchKind,
|
||||
search: milli::Search<'_>,
|
||||
) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
|
||||
let (milli_result, semantic_hit_count) = match &search_kind {
|
||||
SearchKind::KeywordOnly => (search.execute()?, None),
|
||||
SearchKind::KeywordOnly => {
|
||||
let results = search
|
||||
.execute()
|
||||
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
|
||||
(results, None)
|
||||
}
|
||||
SearchKind::SemanticOnly { .. } => {
|
||||
let results = search.execute()?;
|
||||
let results = search
|
||||
.execute()
|
||||
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let semantic_hit_count = results.document_scores.len() as u32;
|
||||
(results, Some(semantic_hit_count))
|
||||
}
|
||||
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
||||
SearchKind::Hybrid { semantic_ratio, .. } => search
|
||||
.execute_hybrid(*semantic_ratio)
|
||||
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid)))?,
|
||||
};
|
||||
Ok((milli_result, semantic_hit_count))
|
||||
}
|
||||
@@ -1181,7 +1197,7 @@ impl<'a> HitMaker<'a> {
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
format: AttributesFormat,
|
||||
mut formatter_builder: MatcherBuilder<'a>,
|
||||
) -> Result<Self, MeilisearchHttpError> {
|
||||
) -> milli::Result<Self> {
|
||||
formatter_builder.crop_marker(format.crop_marker);
|
||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||
@@ -1276,11 +1292,7 @@ impl<'a> HitMaker<'a> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn make_hit(
|
||||
&self,
|
||||
id: u32,
|
||||
score: &[ScoreDetails],
|
||||
) -> Result<SearchHit, MeilisearchHttpError> {
|
||||
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
|
||||
let (_, obkv) =
|
||||
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
|
||||
|
||||
@@ -1323,7 +1335,10 @@ impl<'a> HitMaker<'a> {
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
|
||||
vectors.insert(name, serde_json::to_value(embeddings)?);
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson).unwrap(),
|
||||
);
|
||||
}
|
||||
document.insert("_vectors".into(), vectors.into());
|
||||
}
|
||||
@@ -1369,7 +1384,7 @@ fn make_hits<'a>(
|
||||
format: AttributesFormat,
|
||||
matching_words: milli::MatchingWords,
|
||||
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
|
||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||
) -> milli::Result<Vec<SearchHit>> {
|
||||
let mut documents = Vec::new();
|
||||
|
||||
let dictionary = index.dictionary(rtxn)?;
|
||||
@@ -1407,6 +1422,13 @@ pub fn perform_facet_search(
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
if !index.facet_search(&rtxn)? {
|
||||
return Err(ResponseError::from_msg(
|
||||
"The facet search is disabled for this index".to_string(),
|
||||
Code::FacetSearchDisabled,
|
||||
));
|
||||
}
|
||||
|
||||
// In the faceted search context, we want to use the intersection between the locales provided by the user
|
||||
// and the locales of the facet string.
|
||||
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
|
||||
@@ -1690,12 +1712,12 @@ fn make_document(
|
||||
displayed_attributes: &BTreeSet<FieldId>,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: &obkv::KvReaderU16,
|
||||
) -> Result<Document, MeilisearchHttpError> {
|
||||
) -> milli::Result<Document> {
|
||||
let mut document = serde_json::Map::new();
|
||||
|
||||
// recreate the original json
|
||||
for (key, value) in obkv.iter() {
|
||||
let value = serde_json::from_slice(value)?;
|
||||
let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap();
|
||||
let key = field_ids_map.name(key).expect("Missing field name").to_string();
|
||||
|
||||
document.insert(key, value);
|
||||
@@ -1720,7 +1742,7 @@ fn format_fields(
|
||||
displayable_ids: &BTreeSet<FieldId>,
|
||||
locales: Option<&[Language]>,
|
||||
localized_attributes: &[LocalizedAttributesRule],
|
||||
) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
|
||||
) -> milli::Result<(Option<MatchesPosition>, Document)> {
|
||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
||||
let mut document = document.clone();
|
||||
|
||||
@@ -1898,7 +1920,7 @@ fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpEr
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Filter::from_array(ands)?)
|
||||
Filter::from_array(ands).map_err(|e| MeilisearchHttpError::from_milli(e, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -224,7 +224,7 @@ async fn list_batches_status_and_type_filtered() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_batch_filter_error() {
|
||||
async fn list_batch_filter_error() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.batches_filter("lol=pied").await;
|
||||
@@ -284,6 +284,7 @@ async fn test_summarized_document_addition_or_update() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
@@ -314,6 +315,7 @@ async fn test_summarized_document_addition_or_update() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 1,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
@@ -349,6 +351,7 @@ async fn test_summarized_delete_documents_by_batch() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"providedIds": 3,
|
||||
"deletedDocuments": 0
|
||||
@@ -380,6 +383,7 @@ async fn test_summarized_delete_documents_by_batch() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 2,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"providedIds": 1,
|
||||
"deletedDocuments": 0
|
||||
@@ -416,6 +420,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 0,
|
||||
@@ -448,6 +453,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 2,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 0,
|
||||
@@ -480,6 +486,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 4,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 0,
|
||||
@@ -516,6 +523,7 @@ async fn test_summarized_delete_document_by_id() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"providedIds": 1,
|
||||
"deletedDocuments": 0
|
||||
@@ -547,6 +555,7 @@ async fn test_summarized_delete_document_by_id() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 2,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"providedIds": 1,
|
||||
"deletedDocuments": 0
|
||||
@@ -594,6 +603,7 @@ async fn test_summarized_settings_update() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"displayedAttributes": [
|
||||
"doggos",
|
||||
@@ -638,6 +648,7 @@ async fn test_summarized_index_creation() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
@@ -665,6 +676,7 @@ async fn test_summarized_index_creation() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 1,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"primaryKey": "doggos"
|
||||
},
|
||||
@@ -809,6 +821,7 @@ async fn test_summarized_index_update() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
@@ -836,6 +849,7 @@ async fn test_summarized_index_update() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 1,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"primaryKey": "bones"
|
||||
},
|
||||
@@ -868,6 +882,7 @@ async fn test_summarized_index_update() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 3,
|
||||
"progress": null,
|
||||
"details": {},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
@@ -895,6 +910,7 @@ async fn test_summarized_index_update() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 4,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"primaryKey": "bones"
|
||||
},
|
||||
@@ -932,6 +948,7 @@ async fn test_summarized_index_swap() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"swaps": [
|
||||
{
|
||||
@@ -972,6 +989,7 @@ async fn test_summarized_index_swap() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 3,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"swaps": [
|
||||
{
|
||||
@@ -1014,6 +1032,7 @@ async fn test_summarized_batch_cancelation() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 1,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"matchedTasks": 1,
|
||||
"canceledTasks": 0,
|
||||
@@ -1051,6 +1070,7 @@ async fn test_summarized_batch_deletion() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 1,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"matchedTasks": 1,
|
||||
"deletedTasks": 1,
|
||||
@@ -1084,6 +1104,7 @@ async fn test_summarized_dump_creation() {
|
||||
@r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"dumpUid": "[dumpUid]"
|
||||
},
|
||||
|
||||
@@ -52,6 +52,25 @@ impl Value {
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Return `true` if the `status` field is set to `failed`.
|
||||
/// Panic if the `status` field doesn't exists.
|
||||
#[track_caller]
|
||||
pub fn is_fail(&self) -> bool {
|
||||
if !self["status"].is_string() {
|
||||
panic!("Called `is_fail` on {}", serde_json::to_string_pretty(&self.0).unwrap());
|
||||
}
|
||||
self["status"] == serde_json::Value::String(String::from("failed"))
|
||||
}
|
||||
|
||||
// Panic if the json doesn't contain the `status` field set to "succeeded"
|
||||
#[track_caller]
|
||||
pub fn failed(&self) -> &Self {
|
||||
if !self.is_fail() {
|
||||
panic!("Called failed on {}", serde_json::to_string_pretty(&self.0).unwrap());
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Value> for Value {
|
||||
|
||||
@@ -1220,9 +1220,89 @@ async fn replace_document() {
|
||||
#[actix_rt::test]
|
||||
async fn add_no_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (_response, code) = index.add_documents(json!([]), None).await;
|
||||
let index = server.index("kefir");
|
||||
let (task, code) = index.add_documents(json!([]), None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = server.wait_task(task.uid()).await;
|
||||
let task = task.succeeded();
|
||||
snapshot!(task, @r#"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "kefir",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 0,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"#);
|
||||
|
||||
let (task, _code) = index.add_documents(json!([]), Some("kefkef")).await;
|
||||
let task = server.wait_task(task.uid()).await;
|
||||
let task = task.succeeded();
|
||||
snapshot!(task, @r#"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "kefir",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 0,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"#);
|
||||
|
||||
let (task, _code) = index.add_documents(json!([{ "kefkef": 1 }]), None).await;
|
||||
let task = server.wait_task(task.uid()).await;
|
||||
let task = task.succeeded();
|
||||
snapshot!(task, @r#"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "kefir",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"#);
|
||||
let (documents, _status) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(documents, @r#"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"kefkef": 1
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -1264,15 +1344,18 @@ async fn error_add_documents_bad_document_id() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(Some("docid")).await;
|
||||
|
||||
// unsupported characters
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"docid": "foo & bar",
|
||||
"content": "foobar"
|
||||
}
|
||||
]);
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, code) = index.get_task(1).await;
|
||||
let (value, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(value.uid()).await;
|
||||
let (response, code) = index.get_task(value.uid()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
@@ -1288,7 +1371,81 @@ async fn error_add_documents_bad_document_id() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// More than 512 bytes
|
||||
let documents = json!([
|
||||
{
|
||||
"docid": "a".repeat(600),
|
||||
"content": "foobar"
|
||||
}
|
||||
]);
|
||||
let (value, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(value.uid()).await;
|
||||
let (response, code) = index.get_task(value.uid()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"batchUid": 2,
|
||||
"indexUid": "test",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Exactly 512 bytes
|
||||
let documents = json!([
|
||||
{
|
||||
"docid": "a".repeat(512),
|
||||
"content": "foobar"
|
||||
}
|
||||
]);
|
||||
let (value, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(value.uid()).await;
|
||||
let (response, code) = index.get_task(value.uid()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": 3,
|
||||
"batchUid": 3,
|
||||
"indexUid": "test",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||
@@ -1681,7 +1838,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
|
||||
"message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1719,7 +1876,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1757,7 +1914,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1795,7 +1952,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1833,7 +1990,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1871,7 +2028,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1909,7 +2066,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1947,7 +2104,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
|
||||
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1985,7 +2142,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2023,7 +2180,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2061,7 +2218,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
|
||||
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2099,7 +2256,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
|
||||
"message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2138,7 +2295,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Index `test`: Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2175,7 +2332,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Index `test`: Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2212,7 +2369,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
|
||||
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2279,7 +2436,7 @@ async fn add_invalid_geo_and_then_settings() {
|
||||
]
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Index `test`: Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
|
||||
@@ -604,7 +604,7 @@ async fn delete_document_by_filter() {
|
||||
"originalFilter": "\"doggo = bernese\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"message": "Index `EMPTY_INDEX`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
@@ -636,7 +636,7 @@ async fn delete_document_by_filter() {
|
||||
"originalFilter": "\"catto = jorts\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
|
||||
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
|
||||
@@ -172,7 +172,7 @@ async fn error_update_documents_bad_document_id() {
|
||||
assert_eq!(
|
||||
response["error"]["message"],
|
||||
json!(
|
||||
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
|
||||
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes."#
|
||||
)
|
||||
);
|
||||
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
||||
|
||||
@@ -95,7 +95,7 @@ async fn error_update_existing_primary_key() {
|
||||
let response = index.wait_task(2).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index already has a primary key: `id`.",
|
||||
"message": "Index `test`: Index already has a primary key: `id`.",
|
||||
"code": "index_primary_key_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_primary_key_already_exists"
|
||||
|
||||
@@ -711,7 +711,7 @@ async fn filter_invalid_attribute_array() {
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -733,7 +733,7 @@ async fn filter_invalid_attribute_string() {
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -940,7 +940,7 @@ async fn sort_unsortable_attribute() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `title` is not sortable. Available sortable attributes are: `id`.",
|
||||
"message": format!("Index `{}`: Attribute `title` is not sortable. Available sortable attributes are: `id`.", index.uid),
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -998,7 +998,7 @@ async fn sort_unset_ranking_rule() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
|
||||
"message": format!("Index `{}`: You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", index.uid),
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -1024,19 +1024,18 @@ async fn search_on_unknown_field() {
|
||||
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
});
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@@ -1050,19 +1049,18 @@ async fn search_on_unknown_field_plus_joker() {
|
||||
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
});
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@@ -1071,15 +1069,8 @@ async fn search_on_unknown_field_plus_joker() {
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@@ -1092,47 +1083,44 @@ async fn distinct_at_search_time() {
|
||||
let (task, _) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
|
||||
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
|
||||
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
|
||||
|
||||
@@ -57,6 +57,116 @@ async fn simple_facet_search() {
|
||||
assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_facet_search_on_movies() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Carol",
|
||||
"genres": [
|
||||
"Romance",
|
||||
"Drama"
|
||||
],
|
||||
"color": [
|
||||
"red"
|
||||
],
|
||||
"platforms": [
|
||||
"MacOS",
|
||||
"Linux",
|
||||
"Windows"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Wonder Woman",
|
||||
"genres": [
|
||||
"Action",
|
||||
"Adventure"
|
||||
],
|
||||
"color": [
|
||||
"green"
|
||||
],
|
||||
"platforms": [
|
||||
"MacOS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Life of Pi",
|
||||
"genres": [
|
||||
"Adventure",
|
||||
"Drama"
|
||||
],
|
||||
"color": [
|
||||
"blue"
|
||||
],
|
||||
"platforms": [
|
||||
"Windows"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Mad Max: Fury Road",
|
||||
"genres": [
|
||||
"Adventure",
|
||||
"Science Fiction"
|
||||
],
|
||||
"color": [
|
||||
"red"
|
||||
],
|
||||
"platforms": [
|
||||
"MacOS",
|
||||
"Linux"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Moana",
|
||||
"genres": [
|
||||
"Fantasy",
|
||||
"Action"
|
||||
],
|
||||
"color": [
|
||||
"red"
|
||||
],
|
||||
"platforms": [
|
||||
"Windows"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Philadelphia",
|
||||
"genres": [
|
||||
"Drama"
|
||||
],
|
||||
"color": [
|
||||
"blue"
|
||||
],
|
||||
"platforms": [
|
||||
"MacOS",
|
||||
"Linux",
|
||||
"Windows"
|
||||
]
|
||||
}
|
||||
]);
|
||||
let (response, code) =
|
||||
index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetQuery": "", "facetName": "genres", "q": "" })).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(response["facetHits"], @r###"[{"value":"Action","count":2},{"value":"Adventure","count":3},{"value":"Drama","count":3},{"value":"Fantasy","count":1},{"value":"Romance","count":1},{"value":"Science Fiction","count":1}]"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn advanced_facet_search() {
|
||||
let server = Server::new().await;
|
||||
@@ -221,8 +331,15 @@ async fn add_documents_and_deactivate_facet_search() {
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The facet search is disabled for this index",
|
||||
"code": "facet_search_disabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -245,8 +362,15 @@ async fn deactivate_facet_search_and_add_documents() {
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The facet search is disabled for this index",
|
||||
"code": "facet_search_disabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -1746,3 +1746,57 @@ async fn change_attributes_settings() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Modifying facets with different casing should work correctly
|
||||
#[actix_rt::test]
|
||||
async fn change_facet_casing() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"filterableAttributes": ["dog"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"dog": "Bouvier Bernois"
|
||||
}
|
||||
]),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"dog": "bouvier bernois"
|
||||
}
|
||||
]),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
index
|
||||
.search(json!({ "facets": ["dog"] }), |response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["facetDistribution"]), @r###"
|
||||
{
|
||||
"dog": {
|
||||
"bouvier bernois": 1
|
||||
}
|
||||
}
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -1070,7 +1070,7 @@ async fn federation_one_query_error() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -1102,7 +1102,7 @@ async fn federation_one_query_sort_error() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -1166,7 +1166,7 @@ async fn federation_multiple_query_errors() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[0]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -1198,7 +1198,7 @@ async fn federation_multiple_query_sort_errors() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[0]`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
|
||||
"message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -1231,7 +1231,7 @@ async fn federation_multiple_query_errors_interleaved() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -1264,7 +1264,7 @@ async fn federation_multiple_query_sort_errors_interleaved() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
|
||||
@@ -1,44 +1,185 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("displayed_attributes", json!(["*"]));
|
||||
map.insert("searchable_attributes", json!(["*"]));
|
||||
map.insert("localized_attributes", json!(null));
|
||||
map.insert("filterable_attributes", json!([]));
|
||||
map.insert("distinct_attribute", json!(null));
|
||||
map.insert(
|
||||
"ranking_rules",
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
|
||||
);
|
||||
map.insert("stop_words", json!([]));
|
||||
map.insert("non_separator_tokens", json!([]));
|
||||
map.insert("separator_tokens", json!([]));
|
||||
map.insert("dictionary", json!([]));
|
||||
map.insert("synonyms", json!({}));
|
||||
map.insert(
|
||||
"faceting",
|
||||
json!({
|
||||
"maxValuesPerFacet": json!(100),
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
macro_rules! test_setting_routes {
|
||||
($({setting: $setting:ident, update_verb: $update_verb:ident, default_value: $default_value:tt},) *) => {
|
||||
$(
|
||||
mod $setting {
|
||||
use crate::common::Server;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.$update_verb(url, serde_json::Value::Null.into()).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
server.index("").wait_task(0).await;
|
||||
let (response, code) = server.index("test").get().await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_, code) = server.service.delete(url).await;
|
||||
assert_eq!(code, 202);
|
||||
let response = server.index("").wait_task(0).await;
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_default() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(None).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
index.wait_task(0).await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let expected = crate::json!($default_value);
|
||||
assert_eq!(expected, response);
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
map.insert(
|
||||
"pagination",
|
||||
json!({
|
||||
"maxTotalHits": json!(1000),
|
||||
}),
|
||||
);
|
||||
map.insert("search_cutoff_ms", json!(null));
|
||||
map
|
||||
});
|
||||
)*
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn all_setting_tested() {
|
||||
let expected = std::collections::BTreeSet::from_iter(meilisearch::routes::indexes::settings::ALL_SETTINGS_NAMES.iter());
|
||||
let tested = std::collections::BTreeSet::from_iter([$(stringify!($setting)),*].iter());
|
||||
let diff: Vec<_> = expected.difference(&tested).collect();
|
||||
assert!(diff.is_empty(), "Not all settings were tested, please add the following settings to the `test_setting_routes!` macro: {:?}", diff);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_setting_routes!(
|
||||
{
|
||||
setting: filterable_attributes,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: displayed_attributes,
|
||||
update_verb: put,
|
||||
default_value: ["*"]
|
||||
},
|
||||
{
|
||||
setting: localized_attributes,
|
||||
update_verb: put,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: searchable_attributes,
|
||||
update_verb: put,
|
||||
default_value: ["*"]
|
||||
},
|
||||
{
|
||||
setting: distinct_attribute,
|
||||
update_verb: put,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: stop_words,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: separator_tokens,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: non_separator_tokens,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: dictionary,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: ranking_rules,
|
||||
update_verb: put,
|
||||
default_value: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
|
||||
},
|
||||
{
|
||||
setting: synonyms,
|
||||
update_verb: put,
|
||||
default_value: {}
|
||||
},
|
||||
{
|
||||
setting: pagination,
|
||||
update_verb: patch,
|
||||
default_value: {"maxTotalHits": 1000}
|
||||
},
|
||||
{
|
||||
setting: faceting,
|
||||
update_verb: patch,
|
||||
default_value: {"maxValuesPerFacet": 100, "sortFacetValuesBy": {"*": "alpha"}}
|
||||
},
|
||||
{
|
||||
setting: search_cutoff_ms,
|
||||
update_verb: put,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: embedders,
|
||||
update_verb: patch,
|
||||
default_value: null
|
||||
},
|
||||
{
|
||||
setting: facet_search,
|
||||
update_verb: put,
|
||||
default_value: true
|
||||
},
|
||||
{
|
||||
setting: prefix_search,
|
||||
update_verb: put,
|
||||
default_value: "indexingTime"
|
||||
},
|
||||
{
|
||||
setting: proximity_precision,
|
||||
update_verb: put,
|
||||
default_value: "byWord"
|
||||
},
|
||||
{
|
||||
setting: sortable_attributes,
|
||||
update_verb: put,
|
||||
default_value: []
|
||||
},
|
||||
{
|
||||
setting: typo_tolerance,
|
||||
update_verb: patch,
|
||||
default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": []}
|
||||
},
|
||||
);
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_settings_unexisting_index() {
|
||||
@@ -342,93 +483,6 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
|
||||
"###);
|
||||
}
|
||||
|
||||
macro_rules! test_setting_routes {
|
||||
($($setting:ident $write_method:ident), *) => {
|
||||
$(
|
||||
mod $setting {
|
||||
use crate::common::Server;
|
||||
use super::DEFAULT_SETTINGS_VALUES;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.$write_method(url, serde_json::Value::Null.into()).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
server.index("").wait_task(0).await;
|
||||
let (response, code) = server.index("test").get().await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (_, code) = server.service.delete(url).await;
|
||||
assert_eq!(code, 202);
|
||||
let response = server.index("").wait_task(0).await;
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_default() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(None).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
index.wait_task(0).await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.get(url).await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let expected = DEFAULT_SETTINGS_VALUES.get(stringify!($setting)).unwrap();
|
||||
assert_eq!(expected, &response);
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
test_setting_routes!(
|
||||
filterable_attributes put,
|
||||
displayed_attributes put,
|
||||
localized_attributes put,
|
||||
searchable_attributes put,
|
||||
distinct_attribute put,
|
||||
stop_words put,
|
||||
separator_tokens put,
|
||||
non_separator_tokens put,
|
||||
dictionary put,
|
||||
ranking_rules put,
|
||||
synonyms put,
|
||||
pagination patch,
|
||||
faceting patch,
|
||||
search_cutoff_ms put
|
||||
);
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_set_invalid_ranking_rules() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -79,7 +79,7 @@ async fn similar_bad_id() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
@@ -172,7 +172,7 @@ async fn similar_invalid_id() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
|
||||
@@ -129,11 +129,11 @@ async fn perform_on_demand_snapshot() {
|
||||
|
||||
index.load_test_set().await;
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(task.uid()).await.failed();
|
||||
|
||||
let (task, code) = server.create_snapshot().await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
@@ -448,7 +448,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
||||
"originalFilter": "\"doggo = bernese\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"message": "Index `test`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
|
||||
@@ -318,7 +318,7 @@ async fn try_to_disable_binary_quantization() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
||||
"message": "Index `doggo`: `.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
||||
"code": "invalid_settings_embedders",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
||||
|
||||
@@ -250,7 +250,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -280,7 +280,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -311,7 +311,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -340,7 +340,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -369,7 +369,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -398,7 +398,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -440,7 +440,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -469,7 +469,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -498,7 +498,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -539,7 +539,7 @@ async fn user_provided_vectors_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -569,7 +569,7 @@ async fn user_provided_vectors_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -599,7 +599,7 @@ async fn user_provided_vectors_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
||||
@@ -713,7 +713,7 @@ async fn bad_api_key() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -757,7 +757,7 @@ async fn bad_api_key() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
||||
@@ -985,7 +985,7 @@ async fn bad_settings() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1025,7 +1025,7 @@ async fn bad_settings() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1178,7 +1178,7 @@ async fn server_returns_bad_request() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1247,7 +1247,7 @@ async fn server_returns_bad_request() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1306,7 +1306,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1362,7 +1362,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1414,7 +1414,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected f32",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected f32",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1478,7 +1478,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1542,7 +1542,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1908,7 +1908,7 @@ async fn server_custom_header() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1951,7 +1951,7 @@ async fn server_custom_header() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -2099,7 +2099,7 @@ async fn searchable_reindex() {
|
||||
]
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
||||
@@ -10,12 +10,15 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
serde = { version = "1.0.209", features = ["derive"] }
|
||||
serde_json = { version = "1.0.133", features = ["preserve_order"] }
|
||||
tempfile = "3.14.0"
|
||||
time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
|
||||
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
|
||||
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
|
||||
|
||||
@@ -73,7 +73,7 @@ enum Command {
|
||||
///
|
||||
/// Supported upgrade paths:
|
||||
///
|
||||
/// - v1.9.x -> v1.10.x -> v1.11.x
|
||||
/// - v1.9.x -> v1.10.x -> v1.11.x -> v1.12.x
|
||||
OfflineUpgrade {
|
||||
#[arg(long)]
|
||||
target_version: String,
|
||||
@@ -88,7 +88,7 @@ fn main() -> anyhow::Result<()> {
|
||||
match command {
|
||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version)
|
||||
}
|
||||
Command::OfflineUpgrade { target_version } => {
|
||||
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
|
||||
@@ -187,6 +187,7 @@ fn export_a_dump(
|
||||
db_path: PathBuf,
|
||||
dump_dir: PathBuf,
|
||||
skip_enqueued_tasks: bool,
|
||||
detected_version: (String, String, String),
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
|
||||
@@ -238,9 +239,6 @@ fn export_a_dump(
|
||||
if skip_enqueued_tasks {
|
||||
eprintln!("Skip dumping the enqueued tasks...");
|
||||
} else {
|
||||
eprintln!("Dumping the enqueued tasks...");
|
||||
|
||||
// 3. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
let mut count = 0;
|
||||
for ret in all_tasks.iter(&rtxn)? {
|
||||
@@ -254,18 +252,39 @@ fn export_a_dump(
|
||||
if status == Status::Enqueued {
|
||||
let content_file = file_store.get_update(content_file_uuid)?;
|
||||
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
|
||||
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
|
||||
if (
|
||||
detected_version.0.as_str(),
|
||||
detected_version.1.as_str(),
|
||||
detected_version.2.as_str(),
|
||||
) < ("1", "12", "0")
|
||||
{
|
||||
eprintln!("Dumping the enqueued tasks reading them in obkv format...");
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
let (mut cursor, documents_batch_index) =
|
||||
reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
|
||||
}
|
||||
} else {
|
||||
eprintln!(
|
||||
"Dumping the enqueued tasks reading them in JSON stream format..."
|
||||
);
|
||||
for document in
|
||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
||||
{
|
||||
let document = document.with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
dump_content_file.push_document(&document)?;
|
||||
}
|
||||
}
|
||||
|
||||
dump_content_file.flush()?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
mod v1_10;
|
||||
mod v1_11;
|
||||
mod v1_12;
|
||||
mod v1_9;
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use meilisearch_types::versioning::create_version_file;
|
||||
|
||||
use v1_10::v1_9_to_v1_10;
|
||||
use v1_12::{v1_11_to_v1_12, v1_12_to_v1_12_3};
|
||||
|
||||
use crate::upgrade::v1_11::v1_10_to_v1_11;
|
||||
|
||||
@@ -19,11 +20,48 @@ pub struct OfflineUpgrade {
|
||||
|
||||
impl OfflineUpgrade {
|
||||
pub fn upgrade(self) -> anyhow::Result<()> {
|
||||
// Adding a version?
|
||||
//
|
||||
// 1. Update the LAST_SUPPORTED_UPGRADE_FROM_VERSION and LAST_SUPPORTED_UPGRADE_TO_VERSION.
|
||||
// 2. Add new version to the upgrade list if necessary
|
||||
// 3. Use `no_upgrade` as index for versions that are compatible.
|
||||
|
||||
if self.current_version == self.target_version {
|
||||
println!("Database is already at the target version. Exiting.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if self.current_version > self.target_version {
|
||||
bail!(
|
||||
"Cannot downgrade from {}.{}.{} to {}.{}.{}. Downgrade not supported",
|
||||
self.current_version.0,
|
||||
self.current_version.1,
|
||||
self.current_version.2,
|
||||
self.target_version.0,
|
||||
self.target_version.1,
|
||||
self.target_version.2
|
||||
);
|
||||
}
|
||||
|
||||
const FIRST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.9.0";
|
||||
const LAST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.12.7";
|
||||
const FIRST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.10.0";
|
||||
const LAST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.12.7";
|
||||
|
||||
let upgrade_list = [
|
||||
(v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
|
||||
(
|
||||
v1_9_to_v1_10 as fn(&Path, &str, &str, &str) -> Result<(), anyhow::Error>,
|
||||
"1",
|
||||
"10",
|
||||
"0",
|
||||
),
|
||||
(v1_10_to_v1_11, "1", "11", "0"),
|
||||
(v1_11_to_v1_12, "1", "12", "0"),
|
||||
(v1_12_to_v1_12_3, "1", "12", "3"),
|
||||
];
|
||||
|
||||
let no_upgrade: usize = upgrade_list.len();
|
||||
|
||||
let (current_major, current_minor, current_patch) = &self.current_version;
|
||||
|
||||
let start_at = match (
|
||||
@@ -33,8 +71,13 @@ impl OfflineUpgrade {
|
||||
) {
|
||||
("1", "9", _) => 0,
|
||||
("1", "10", _) => 1,
|
||||
("1", "11", _) => 2,
|
||||
("1", "12", "0" | "1" | "2") => 3,
|
||||
("1", "12", "3" | "4" | "5" | "6" | "7") => no_upgrade,
|
||||
_ => {
|
||||
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10")
|
||||
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from versions in range [{}-{}]",
|
||||
FIRST_SUPPORTED_UPGRADE_FROM_VERSION,
|
||||
LAST_SUPPORTED_UPGRADE_FROM_VERSION);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -43,20 +86,32 @@ impl OfflineUpgrade {
|
||||
let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
|
||||
("1", "10", _) => 0,
|
||||
("1", "11", _) => 1,
|
||||
("1", "12", "0" | "1" | "2") => 2,
|
||||
("1", "12", "3" | "4" | "5" | "6" | "7") => 3,
|
||||
(major, _, _) if major.starts_with('v') => {
|
||||
bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
|
||||
}
|
||||
_ => {
|
||||
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10 and v1.11")
|
||||
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to versions in range [{}-{}]",
|
||||
FIRST_SUPPORTED_UPGRADE_TO_VERSION,
|
||||
LAST_SUPPORTED_UPGRADE_TO_VERSION);
|
||||
}
|
||||
};
|
||||
|
||||
println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
|
||||
|
||||
if start_at == no_upgrade {
|
||||
println!("No upgrade operation to perform, writing VERSION file");
|
||||
create_version_file(&self.db_path, target_major, target_minor, target_patch)
|
||||
.context("while writing VERSION file after the upgrade")?;
|
||||
println!("Success");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for index in start_at..=ends_at {
|
||||
let (func, major, minor, patch) = upgrade_list[index];
|
||||
(func)(&self.db_path)?;
|
||||
(func)(&self.db_path, current_major, current_minor, current_patch)?;
|
||||
println!("Done");
|
||||
// We're writing the version file just in case an issue arise _while_ upgrading.
|
||||
// We don't want the DB to fail in an unknown state.
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
use anyhow::bail;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use meilisearch_types::{
|
||||
heed::{
|
||||
types::{SerdeJson, Str},
|
||||
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
|
||||
},
|
||||
milli::index::{db_name, main_key},
|
||||
};
|
||||
|
||||
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
|
||||
use anyhow::{bail, Context};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
||||
use meilisearch_types::milli::index::{db_name, main_key};
|
||||
|
||||
use super::v1_9;
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{try_opening_database, try_opening_poly_database};
|
||||
|
||||
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
|
||||
|
||||
@@ -156,7 +151,12 @@ fn date_round_trip(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> {
|
||||
pub fn v1_9_to_v1_10(
|
||||
db_path: &Path,
|
||||
_origin_major: &str,
|
||||
_origin_minor: &str,
|
||||
_origin_patch: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
println!("Upgrading from v1.9.0 to v1.10.0");
|
||||
// 2 changes here
|
||||
|
||||
|
||||
@@ -7,14 +7,19 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use meilisearch_types::{
|
||||
heed::{types::Str, Database, EnvOpenOptions},
|
||||
milli::index::db_name,
|
||||
};
|
||||
use meilisearch_types::heed::types::Str;
|
||||
use meilisearch_types::heed::{Database, EnvOpenOptions};
|
||||
use meilisearch_types::milli::index::db_name;
|
||||
|
||||
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{try_opening_database, try_opening_poly_database};
|
||||
|
||||
pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
|
||||
pub fn v1_10_to_v1_11(
|
||||
db_path: &Path,
|
||||
_origin_major: &str,
|
||||
_origin_minor: &str,
|
||||
_origin_patch: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
println!("Upgrading from v1.10.0 to v1.11.0");
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
|
||||
298
crates/meilitool/src/upgrade/v1_12.rs
Normal file
298
crates/meilitool/src/upgrade/v1_12.rs
Normal file
@@ -0,0 +1,298 @@
|
||||
//! The breaking changes that happened between the v1.11 and the v1.12 are:
|
||||
//! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::io::BufWriter;
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
use anyhow::Context;
|
||||
use file_store::FileStore;
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_types::milli::documents::DocumentsBatchReader;
|
||||
use meilisearch_types::milli::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::milli::heed::{Database, EnvOpenOptions, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::Step;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::Serialize;
|
||||
use serde_json::value::RawValue;
|
||||
use tempfile::NamedTempFile;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::try_opening_database;
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
|
||||
pub fn v1_11_to_v1_12(
|
||||
db_path: &Path,
|
||||
_origin_major: &str,
|
||||
_origin_minor: &str,
|
||||
_origin_patch: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
println!("Upgrading from v1.11.0 to v1.12.0");
|
||||
|
||||
convert_update_files(db_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn v1_12_to_v1_12_3(
|
||||
db_path: &Path,
|
||||
origin_major: &str,
|
||||
origin_minor: &str,
|
||||
origin_patch: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
println!("Upgrading from v1.12.{{0, 1, 2}} to v1.12.3");
|
||||
|
||||
if origin_minor == "12" {
|
||||
rebuild_field_distribution(db_path)?;
|
||||
} else {
|
||||
println!("Not rebuilding field distribution as it wasn't corrupted coming from v{origin_major}.{origin_minor}.{origin_patch}");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert the update files from OBKV to ndjson format.
|
||||
///
|
||||
/// 1) List all the update files using the file store.
|
||||
/// 2) For each update file, read the update file into a DocumentsBatchReader.
|
||||
/// 3) For each document in the update file, convert the document to a JSON object.
|
||||
/// 4) Write the JSON object to a tmp file in the update files directory.
|
||||
/// 5) Persist the tmp file replacing the old update file.
|
||||
fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
|
||||
let update_files_dir_path = db_path.join("update_files");
|
||||
let file_store = FileStore::new(&update_files_dir_path).with_context(|| {
|
||||
format!("while creating file store for update files dir {update_files_dir_path:?}")
|
||||
})?;
|
||||
|
||||
for uuid in file_store.all_uuids().context("while retrieving uuids from file store")? {
|
||||
let uuid = uuid.context("while retrieving uuid from file store")?;
|
||||
let update_file_path = file_store.get_update_path(uuid);
|
||||
let update_file = file_store
|
||||
.get_update(uuid)
|
||||
.with_context(|| format!("while getting update file for uuid {uuid:?}"))?;
|
||||
|
||||
let mut file =
|
||||
NamedTempFile::new_in(&update_files_dir_path).map(BufWriter::new).with_context(
|
||||
|| format!("while creating bufwriter for update file {update_file_path:?}"),
|
||||
)?;
|
||||
|
||||
let reader = DocumentsBatchReader::from_reader(update_file).with_context(|| {
|
||||
format!("while creating documents batch reader for update file {update_file_path:?}")
|
||||
})?;
|
||||
let (mut cursor, index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
while let Some(document) = cursor.next_document().with_context(|| {
|
||||
format!(
|
||||
"while reading documents from batch reader for update file {update_file_path:?}"
|
||||
)
|
||||
})? {
|
||||
let mut json_document = IndexMap::new();
|
||||
for (fid, value) in document {
|
||||
let field_name = index
|
||||
.name(fid)
|
||||
.with_context(|| format!("while getting field name for fid {fid} for update file {update_file_path:?}"))?;
|
||||
let value: &RawValue = serde_json::from_slice(value)?;
|
||||
json_document.insert(field_name, value);
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut file, &json_document)?;
|
||||
}
|
||||
|
||||
let file = file.into_inner().map_err(|e| e.into_error()).context(format!(
|
||||
"while flushing update file bufwriter for update file {update_file_path:?}"
|
||||
))?;
|
||||
let _ = file
|
||||
// atomically replace the obkv file with the rewritten NDJSON file
|
||||
.persist(&update_file_path)
|
||||
.with_context(|| format!("while persisting update file {update_file_path:?}"))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3
|
||||
fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let mut sched_wtxn = env.write_txn()?;
|
||||
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &sched_wtxn, "index-mapping")?;
|
||||
let stats_db: Database<UuidCodec, SerdeJson<IndexStats>> =
|
||||
try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
|
||||
format!("While trying to open {:?}", index_scheduler_path.display())
|
||||
})?;
|
||||
|
||||
let index_count =
|
||||
index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
|
||||
|
||||
// FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
|
||||
// 1. immutably for the iteration
|
||||
// 2. mutably for updating index stats
|
||||
let indexes: Vec<_> = index_mapping
|
||||
.iter(&sched_wtxn)?
|
||||
.map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
|
||||
.collect();
|
||||
|
||||
let progress = meilisearch_types::milli::progress::Progress::default();
|
||||
let finished = AtomicBool::new(false);
|
||||
|
||||
std::thread::scope(|scope| {
|
||||
let display_progress = std::thread::Builder::new()
|
||||
.name("display_progress".into())
|
||||
.spawn_scoped(scope, || {
|
||||
while !finished.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
std::thread::sleep(std::time::Duration::from_secs(5));
|
||||
let view = progress.as_progress_view();
|
||||
let Ok(view) = serde_json::to_string(&view) else {
|
||||
continue;
|
||||
};
|
||||
println!("{view}");
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
for (index_index, result) in indexes.into_iter().enumerate() {
|
||||
let (uid, uuid) = result?;
|
||||
progress.update_progress(VariableNameStep::new(
|
||||
&uid,
|
||||
index_index as u32,
|
||||
index_count as u32,
|
||||
));
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
|
||||
println!(
|
||||
"[{}/{index_count}]Updating index `{uid}` at `{}`",
|
||||
index_index + 1,
|
||||
index_path.display()
|
||||
);
|
||||
|
||||
println!("\t- Rebuilding field distribution");
|
||||
|
||||
let index = meilisearch_types::milli::Index::new(EnvOpenOptions::new(), &index_path)
|
||||
.with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?;
|
||||
|
||||
let mut index_txn = index.write_txn()?;
|
||||
|
||||
meilisearch_types::milli::update::new::reindex::field_distribution(
|
||||
&index,
|
||||
&mut index_txn,
|
||||
&progress,
|
||||
)
|
||||
.context("while rebuilding field distribution")?;
|
||||
|
||||
let stats = IndexStats::new(&index, &index_txn)
|
||||
.with_context(|| format!("computing stats for index `{uid}`"))?;
|
||||
store_stats_of(stats_db, uuid, &mut sched_wtxn, &uid, &stats)?;
|
||||
|
||||
index_txn.commit().context("while committing the write txn for the updated index")?;
|
||||
}
|
||||
|
||||
sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
|
||||
|
||||
finished.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
if let Err(panic) = display_progress.join() {
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
eprintln!("WARN: the display thread panicked with {msg}");
|
||||
}
|
||||
|
||||
println!("Upgrading database succeeded");
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub struct VariableNameStep {
|
||||
name: String,
|
||||
current: u32,
|
||||
total: u32,
|
||||
}
|
||||
|
||||
impl VariableNameStep {
|
||||
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
||||
Self { name: name.into(), current, total }
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for VariableNameStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.name.clone().into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.total
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store_stats_of(
|
||||
stats_db: Database<UuidCodec, SerdeJson<IndexStats>>,
|
||||
index_uuid: Uuid,
|
||||
sched_wtxn: &mut RwTxn,
|
||||
index_uid: &str,
|
||||
stats: &IndexStats,
|
||||
) -> anyhow::Result<()> {
|
||||
stats_db
|
||||
.put(sched_wtxn, &index_uuid, stats)
|
||||
.with_context(|| format!("storing stats for index `{index_uid}`"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The statistics that can be computed from an `Index` object.
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct IndexStats {
|
||||
/// Number of documents in the index.
|
||||
pub number_of_documents: u64,
|
||||
/// Size taken up by the index' DB, in bytes.
|
||||
///
|
||||
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||
/// `used_database_size` that only includes the size of the used pages.
|
||||
pub database_size: u64,
|
||||
/// Size taken by the used pages of the index' DB, in bytes.
|
||||
///
|
||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||
/// this value is typically smaller than `database_size`.
|
||||
pub used_database_size: u64,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
/// Creation date of the index.
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub created_at: OffsetDateTime,
|
||||
/// Date of the last update of the index.
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub updated_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl IndexStats {
|
||||
/// Compute the stats of an index
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> meilisearch_types::milli::Result<Self> {
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
database_size: index.on_disk_size()?,
|
||||
used_database_size: index.used_size()?,
|
||||
field_distribution: index.field_distribution(rtxn)?,
|
||||
created_at: index.created_at(rtxn)?,
|
||||
updated_at: index.updated_at(rtxn)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -42,7 +42,7 @@ obkv = "0.3.0"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.1"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
rstar = { version = "0.12.0", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
|
||||
@@ -91,13 +91,15 @@ ureq = { version = "2.10.0", features = ["json"] }
|
||||
url = "2.5.2"
|
||||
rayon-par-bridge = "0.1.0"
|
||||
hashbrown = "0.15.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.2"
|
||||
thread_local = "1.1.8"
|
||||
allocator-api2 = "0.2.18"
|
||||
rustc-hash = "2.0.0"
|
||||
uell = "0.1.0"
|
||||
enum-iterator = "2.1.0"
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
|
||||
@@ -33,7 +33,7 @@ pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) ->
|
||||
let field_name = index
|
||||
.name(field_id)
|
||||
.ok_or(FieldIdMapMissingEntry::FieldId { field_id, process: "obkv_to_object" })?;
|
||||
let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
|
||||
let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap();
|
||||
Ok((field_name.to_string(), value))
|
||||
})
|
||||
.collect()
|
||||
@@ -84,7 +84,8 @@ impl DocumentsBatchIndex {
|
||||
let key =
|
||||
self.0.get_by_left(&k).ok_or(crate::error::InternalError::DatabaseClosing)?.clone();
|
||||
let value = serde_json::from_slice::<serde_json::Value>(v)
|
||||
.map_err(crate::error::InternalError::SerdeJson)?;
|
||||
.map_err(crate::error::InternalError::SerdeJson)
|
||||
.unwrap();
|
||||
map.insert(key, value);
|
||||
}
|
||||
|
||||
|
||||
@@ -92,7 +92,8 @@ impl<'a> PrimaryKey<'a> {
|
||||
PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
|
||||
Some(document_id_bytes) => {
|
||||
let document_id = serde_json::from_slice(document_id_bytes)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
.map_err(InternalError::SerdeJson)
|
||||
.unwrap();
|
||||
match validate_document_id_value(document_id) {
|
||||
Ok(document_id) => Ok(Ok(document_id)),
|
||||
Err(user_error) => {
|
||||
@@ -108,7 +109,8 @@ impl<'a> PrimaryKey<'a> {
|
||||
if let Some(field_id) = fields.id(first_level_name) {
|
||||
if let Some(value_bytes) = document.get(field_id) {
|
||||
let object = serde_json::from_slice(value_bytes)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
.map_err(InternalError::SerdeJson)
|
||||
.unwrap();
|
||||
fetch_matching_values(object, right, &mut matching_documents_ids);
|
||||
|
||||
if matching_documents_ids.len() >= 2 {
|
||||
@@ -151,11 +153,12 @@ impl<'a> PrimaryKey<'a> {
|
||||
};
|
||||
|
||||
let document_id: &RawValue =
|
||||
serde_json::from_slice(document_id).map_err(InternalError::SerdeJson)?;
|
||||
serde_json::from_slice(document_id).map_err(InternalError::SerdeJson).unwrap();
|
||||
|
||||
let document_id = document_id
|
||||
.deserialize_any(crate::update::new::indexer::de::DocumentIdVisitor(indexer))
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
.map_err(InternalError::SerdeJson)
|
||||
.unwrap();
|
||||
|
||||
let external_document_id = match document_id {
|
||||
Ok(document_id) => Ok(document_id),
|
||||
@@ -173,7 +176,7 @@ impl<'a> PrimaryKey<'a> {
|
||||
|
||||
let Some(value) = document.get(fid) else { continue };
|
||||
let value: &RawValue =
|
||||
serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
|
||||
serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap();
|
||||
match match_component(first_level, right, value, indexer, &mut docid) {
|
||||
ControlFlow::Continue(()) => continue,
|
||||
ControlFlow::Break(Ok(_)) => {
|
||||
@@ -183,7 +186,7 @@ impl<'a> PrimaryKey<'a> {
|
||||
.into())
|
||||
}
|
||||
ControlFlow::Break(Err(err)) => {
|
||||
return Err(InternalError::SerdeJson(err).into())
|
||||
panic!("{err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -280,7 +283,7 @@ fn starts_with(selector: &str, key: &str) -> bool {
|
||||
|
||||
pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
|
||||
if document_id.is_empty()
|
||||
|| document_id.len() > 512
|
||||
|| document_id.len() >= 512
|
||||
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
||||
{
|
||||
None
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::convert::Infallible;
|
||||
use std::fmt::Write;
|
||||
use std::{io, str};
|
||||
|
||||
use bstr::BString;
|
||||
use heed::{Error as HeedError, MdbError};
|
||||
use rayon::ThreadPoolBuildError;
|
||||
use rhai::EvalAltResult;
|
||||
@@ -62,9 +63,9 @@ pub enum InternalError {
|
||||
#[error(transparent)]
|
||||
Store(#[from] MdbError),
|
||||
#[error("Cannot delete {key:?} from database {database_name}: {error}")]
|
||||
StoreDeletion { database_name: &'static str, key: Vec<u8>, error: heed::Error },
|
||||
StoreDeletion { database_name: &'static str, key: BString, error: heed::Error },
|
||||
#[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
|
||||
StorePut { database_name: &'static str, key: Vec<u8>, value_length: usize, error: heed::Error },
|
||||
StorePut { database_name: &'static str, key: BString, value_length: usize, error: heed::Error },
|
||||
#[error(transparent)]
|
||||
Utf8(#[from] str::Utf8Error),
|
||||
#[error("An indexation process was explicitly aborted")]
|
||||
@@ -113,7 +114,7 @@ pub enum UserError {
|
||||
"Document identifier `{}` is invalid. \
|
||||
A document identifier can be of type integer or string, \
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||
and can not be more than 512 bytes.", .document_id.to_string()
|
||||
and can not be more than 511 bytes.", .document_id.to_string()
|
||||
)]
|
||||
InvalidDocumentId { document_id: Value },
|
||||
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
||||
|
||||
@@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![value.size];
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,18 +27,27 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
|
||||
pub fn serialize_into_vec(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
|
||||
Self::serialize_into_writer(roaring, vec).unwrap()
|
||||
}
|
||||
|
||||
pub fn serialize_into_writer<W: io::Write>(
|
||||
roaring: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
) -> io::Result<()> {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
for integer in roaring {
|
||||
vec.write_u32::<NativeEndian>(integer).unwrap();
|
||||
writer.write_u32::<NativeEndian>(integer)?;
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
|
||||
roaring.serialize_into(vec).unwrap();
|
||||
roaring.serialize_into(writer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
@@ -143,7 +152,7 @@ impl CboRoaringBitmapCodec {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into(&previous, buffer);
|
||||
Self::serialize_into_vec(&previous, buffer);
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
@@ -169,7 +178,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec);
|
||||
Self::serialize_into_vec(item, &mut vec);
|
||||
Ok(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1734,6 +1734,7 @@ pub(crate) mod tests {
|
||||
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::progress::Progress;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::settings::InnerIndexSettings;
|
||||
use crate::update::{
|
||||
@@ -1810,7 +1811,7 @@ pub(crate) mod tests {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)?;
|
||||
|
||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||
@@ -1821,6 +1822,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1828,7 +1830,7 @@ pub(crate) mod tests {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
@@ -1900,7 +1902,7 @@ pub(crate) mod tests {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)?;
|
||||
|
||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||
@@ -1911,6 +1913,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1918,7 +1921,7 @@ pub(crate) mod tests {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
@@ -1980,7 +1983,7 @@ pub(crate) mod tests {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1991,6 +1994,7 @@ pub(crate) mod tests {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -1998,7 +2002,7 @@ pub(crate) mod tests {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| should_abort.load(Relaxed),
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
})
|
||||
.unwrap()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[cfg(test)]
|
||||
#[global_allocator]
|
||||
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
@@ -30,6 +31,7 @@ pub mod vector;
|
||||
#[macro_use]
|
||||
pub mod snapshot_tests;
|
||||
mod fieldids_weights_map;
|
||||
pub mod progress;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
@@ -226,7 +228,8 @@ pub fn obkv_to_json(
|
||||
field_id: id,
|
||||
process: "obkv_to_json",
|
||||
})?;
|
||||
let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
|
||||
let value =
|
||||
serde_json::from_slice(value).map_err(error::InternalError::SerdeJson).unwrap();
|
||||
Ok((name.to_owned(), value))
|
||||
})
|
||||
.collect()
|
||||
|
||||
152
crates/milli/src/progress.rs
Normal file
152
crates/milli/src/progress.rs
Normal file
@@ -0,0 +1,152 @@
|
||||
use std::any::TypeId;
|
||||
use std::borrow::Cow;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
pub trait Step: 'static + Send + Sync {
|
||||
fn name(&self) -> Cow<'static, str>;
|
||||
fn current(&self) -> u32;
|
||||
fn total(&self) -> u32;
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Progress {
|
||||
steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
|
||||
}
|
||||
|
||||
impl Progress {
|
||||
pub fn update_progress<P: Step>(&self, sub_progress: P) {
|
||||
let mut steps = self.steps.write().unwrap();
|
||||
let step_type = TypeId::of::<P>();
|
||||
if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
|
||||
steps.truncate(idx);
|
||||
}
|
||||
steps.push((step_type, Box::new(sub_progress)));
|
||||
}
|
||||
|
||||
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
|
||||
pub fn as_progress_view(&self) -> ProgressView {
|
||||
let steps = self.steps.read().unwrap();
|
||||
|
||||
let mut percentage = 0.0;
|
||||
let mut prev_factors = 1.0;
|
||||
|
||||
let mut step_view = Vec::with_capacity(steps.len());
|
||||
for (_, step) in steps.iter() {
|
||||
prev_factors *= step.total() as f32;
|
||||
percentage += step.current() as f32 / prev_factors;
|
||||
|
||||
step_view.push(ProgressStepView {
|
||||
current_step: step.name(),
|
||||
finished: step.current(),
|
||||
total: step.total(),
|
||||
});
|
||||
}
|
||||
|
||||
ProgressView { steps: step_view, percentage: percentage * 100.0 }
|
||||
}
|
||||
}
|
||||
|
||||
/// This trait lets you use the AtomicSubStep defined right below.
|
||||
/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
|
||||
/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
|
||||
pub trait NamedStep: 'static + Send + Sync + Default {
|
||||
fn name(&self) -> &'static str;
|
||||
}
|
||||
|
||||
/// Structure to quickly define steps that need very quick, lockless updating of their current step.
|
||||
/// You can use this struct if:
|
||||
/// - The name of the step doesn't change
|
||||
/// - The total number of steps doesn't change
|
||||
pub struct AtomicSubStep<Name: NamedStep> {
|
||||
unit_name: Name,
|
||||
current: Arc<AtomicU32>,
|
||||
total: u32,
|
||||
}
|
||||
|
||||
impl<Name: NamedStep> AtomicSubStep<Name> {
|
||||
pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
|
||||
let current = Arc::new(AtomicU32::new(0));
|
||||
(current.clone(), Self { current, total, unit_name: Name::default() })
|
||||
}
|
||||
}
|
||||
|
||||
impl<Name: NamedStep> Step for AtomicSubStep<Name> {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.unit_name.name().into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.total
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_enum_progress {
|
||||
($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
|
||||
#[repr(u8)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
$visibility enum $name {
|
||||
$($variant),+
|
||||
}
|
||||
|
||||
impl Step for $name {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
use convert_case::Casing;
|
||||
|
||||
match self {
|
||||
$(
|
||||
$name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
|
||||
),+
|
||||
}
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
*self as u32
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
Self::CARDINALITY as u32
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_atomic_progress {
|
||||
($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
pub struct $struct_name {}
|
||||
impl NamedStep for $struct_name {
|
||||
fn name(&self) -> &'static str {
|
||||
$step_name
|
||||
}
|
||||
}
|
||||
pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
|
||||
};
|
||||
}
|
||||
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProgressView {
|
||||
pub steps: Vec<ProgressStepView>,
|
||||
pub percentage: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProgressStepView {
|
||||
pub current_step: Cow<'static, str>,
|
||||
pub finished: u32,
|
||||
pub total: u32,
|
||||
}
|
||||
@@ -3,12 +3,13 @@ use std::collections::BTreeMap;
|
||||
use std::fmt::{self, Debug};
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::{RawMap, RawVec, Value};
|
||||
use liquid::model::{
|
||||
ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
|
||||
Value as LiquidValue,
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
use raw_collections::{RawMap, RawVec};
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
@@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc
|
||||
}
|
||||
|
||||
impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
|
||||
fn as_debug(&self) -> &dyn fmt::Debug {
|
||||
fn as_debug(&self) -> &dyn Debug {
|
||||
self
|
||||
}
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
@@ -243,14 +244,13 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ParseableValue<'doc> {
|
||||
value: raw_collections::Value<'doc>,
|
||||
value: Value<'doc, FxBuildHasher>,
|
||||
}
|
||||
|
||||
impl<'doc> ParseableValue<'doc> {
|
||||
pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
|
||||
let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
|
||||
let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap();
|
||||
Self { value }
|
||||
}
|
||||
|
||||
@@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
// transparent newtype for implementing ValueView
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug)]
|
||||
struct ParseableMap<'doc>(RawMap<'doc>);
|
||||
#[repr(transparent)]
|
||||
struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>);
|
||||
|
||||
// transparent newtype for implementing ValueView
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug)]
|
||||
#[repr(transparent)]
|
||||
struct ParseableArray<'doc>(RawVec<'doc>);
|
||||
|
||||
impl<'doc> ParseableMap<'doc> {
|
||||
pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> {
|
||||
pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> {
|
||||
// SAFETY: repr(transparent)
|
||||
unsafe { &*(map as *const RawMap as *const Self) }
|
||||
unsafe { &*(map as *const RawMap<FxBuildHasher> as *const Self) }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn render(&self) -> DisplayCow<'_> {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil.render(),
|
||||
Value::Bool(v) => v.render(),
|
||||
@@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn source(&self) -> DisplayCow<'_> {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil.source(),
|
||||
Value::Bool(v) => ValueView::source(v),
|
||||
@@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil.type_name(),
|
||||
Value::Bool(v) => v.type_name(),
|
||||
@@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn query_state(&self, state: State) -> bool {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
|
||||
Value::Bool(v) => ValueView::query_state(v, state),
|
||||
@@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> KStringCow<'_> {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
|
||||
Value::Bool(v) => ValueView::to_kstr(v),
|
||||
@@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil,
|
||||
Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
|
||||
Value::Number(number) => match number {
|
||||
raw_collections::value::Number::PosInt(number) => {
|
||||
Number::PosInt(number) => {
|
||||
let number: i64 = match (*number).try_into() {
|
||||
Ok(number) => number,
|
||||
Err(_) => {
|
||||
@@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
};
|
||||
LiquidValue::Scalar(ScalarCow::new(number))
|
||||
}
|
||||
raw_collections::value::Number::NegInt(number) => {
|
||||
LiquidValue::Scalar(ScalarCow::new(*number))
|
||||
}
|
||||
raw_collections::value::Number::Finite(number) => {
|
||||
LiquidValue::Scalar(ScalarCow::new(*number))
|
||||
}
|
||||
Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
||||
Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
||||
},
|
||||
Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
|
||||
Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
|
||||
@@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
|
||||
Value::Number(number) => match number {
|
||||
@@ -576,34 +580,41 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn is_scalar(&self) -> bool {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
|
||||
}
|
||||
|
||||
fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
|
||||
if let raw_collections::Value::Array(array) = &self.value {
|
||||
if let Value::Array(array) = &self.value {
|
||||
return Some(ParseableArray::as_parseable(array) as _);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_array(&self) -> bool {
|
||||
matches!(&self.value, raw_collections::Value::Array(_))
|
||||
matches!(&self.value, bumparaw_collections::Value::Array(_))
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
if let raw_collections::Value::Object(object) = &self.value {
|
||||
if let Value::Object(object) = &self.value {
|
||||
return Some(ParseableMap::as_parseable(object) as _);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_object(&self) -> bool {
|
||||
matches!(&self.value, raw_collections::Value::Object(_))
|
||||
matches!(&self.value, bumparaw_collections::Value::Object(_))
|
||||
}
|
||||
|
||||
fn is_nil(&self) -> bool {
|
||||
matches!(&self.value, raw_collections::Value::Null)
|
||||
matches!(&self.value, bumparaw_collections::Value::Null)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for ParseableValue<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("ParseableValue").field("value", &self.value).finish()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,6 +38,16 @@ pub struct RenderPromptError {
|
||||
pub fault: FaultSource,
|
||||
}
|
||||
impl RenderPromptError {
|
||||
pub(crate) fn missing_context_with_external_docid(
|
||||
external_docid: String,
|
||||
inner: liquid::Error,
|
||||
) -> RenderPromptError {
|
||||
Self {
|
||||
kind: RenderPromptErrorKind::MissingContextWithExternalDocid(external_docid, inner),
|
||||
fault: FaultSource::User,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
|
||||
Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
|
||||
}
|
||||
@@ -47,6 +57,8 @@ impl RenderPromptError {
|
||||
pub enum RenderPromptErrorKind {
|
||||
#[error("missing field in document: {0}")]
|
||||
MissingContext(liquid::Error),
|
||||
#[error("missing field in document `{0}`: {1}")]
|
||||
MissingContextWithExternalDocid(String, liquid::Error),
|
||||
}
|
||||
|
||||
impl From<RenderPromptError> for crate::Error {
|
||||
|
||||
@@ -119,6 +119,7 @@ impl Prompt {
|
||||
'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents
|
||||
>(
|
||||
&self,
|
||||
external_docid: &str,
|
||||
document: impl crate::update::new::document::Document<'a> + Debug,
|
||||
field_id_map: &RefCell<GlobalFieldsIdsMap>,
|
||||
doc_alloc: &'doc Bump,
|
||||
@@ -130,9 +131,12 @@ impl Prompt {
|
||||
self.max_bytes.unwrap_or_else(default_max_bytes).get(),
|
||||
doc_alloc,
|
||||
);
|
||||
self.template
|
||||
.render_to(&mut rendered, &context)
|
||||
.map_err(RenderPromptError::missing_context)?;
|
||||
self.template.render_to(&mut rendered, &context).map_err(|liquid_error| {
|
||||
RenderPromptError::missing_context_with_external_docid(
|
||||
external_docid.to_owned(),
|
||||
liquid_error,
|
||||
)
|
||||
})?;
|
||||
Ok(std::str::from_utf8(rendered.into_bump_slice())
|
||||
.expect("render can only write UTF-8 because all inputs and processing preserve utf-8"))
|
||||
}
|
||||
|
||||
@@ -219,12 +219,19 @@ impl<'a> FacetDistribution<'a> {
|
||||
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
||||
|
||||
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
|
||||
let original_string = self
|
||||
.index
|
||||
.field_id_docid_facet_strings
|
||||
.get(self.rtxn, &key)?
|
||||
.unwrap()
|
||||
.to_owned();
|
||||
let optional_original_string =
|
||||
self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?;
|
||||
|
||||
let original_string = match optional_original_string {
|
||||
Some(original_string) => original_string.to_owned(),
|
||||
None => {
|
||||
tracing::error!(
|
||||
"Missing original facet string. Using the normalized facet {} instead",
|
||||
facet_key
|
||||
);
|
||||
facet_key.to_string()
|
||||
}
|
||||
};
|
||||
|
||||
distribution.insert(original_string, nbr_docids);
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
|
||||
@@ -5,6 +5,7 @@ use bumpalo::Bump;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{btreemap, hashset};
|
||||
|
||||
use crate::progress::Progress;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
@@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -83,6 +84,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -90,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use rayon::{ThreadPool, ThreadPoolBuilder};
|
||||
@@ -9,6 +9,8 @@ use thiserror::Error;
|
||||
#[derive(Debug)]
|
||||
pub struct ThreadPoolNoAbort {
|
||||
thread_pool: ThreadPool,
|
||||
/// The number of active operations.
|
||||
active_operations: AtomicUsize,
|
||||
/// Set to true if the thread pool catched a panic.
|
||||
pool_catched_panic: Arc<AtomicBool>,
|
||||
}
|
||||
@@ -19,7 +21,9 @@ impl ThreadPoolNoAbort {
|
||||
OP: FnOnce() -> R + Send,
|
||||
R: Send,
|
||||
{
|
||||
self.active_operations.fetch_add(1, Ordering::Relaxed);
|
||||
let output = self.thread_pool.install(op);
|
||||
self.active_operations.fetch_sub(1, Ordering::Relaxed);
|
||||
// While reseting the pool panic catcher we return an error if we catched one.
|
||||
if self.pool_catched_panic.swap(false, Ordering::SeqCst) {
|
||||
Err(PanicCatched)
|
||||
@@ -31,6 +35,11 @@ impl ThreadPoolNoAbort {
|
||||
pub fn current_num_threads(&self) -> usize {
|
||||
self.thread_pool.current_num_threads()
|
||||
}
|
||||
|
||||
/// The number of active operations.
|
||||
pub fn active_operations(&self) -> usize {
|
||||
self.active_operations.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@@ -64,6 +73,10 @@ impl ThreadPoolNoAbortBuilder {
|
||||
let catched_panic = pool_catched_panic.clone();
|
||||
move |_result| catched_panic.store(true, Ordering::SeqCst)
|
||||
});
|
||||
Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic })
|
||||
Ok(ThreadPoolNoAbort {
|
||||
thread_pool: self.0.build()?,
|
||||
active_operations: AtomicUsize::new(0),
|
||||
pool_catched_panic,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,22 +79,29 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::ops::Bound;
|
||||
|
||||
use grenad::Merger;
|
||||
use heed::types::{Bytes, DecodeIgnore};
|
||||
use heed::BytesDecode as _;
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::{try_split_array_at, FieldId, Index, Result};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod incremental;
|
||||
pub mod new_incremental;
|
||||
|
||||
/// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
|
||||
///
|
||||
@@ -646,3 +653,194 @@ mod comparison_bench {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Run sanity checks on the specified fid tree
|
||||
///
|
||||
/// 1. No "orphan" child value, any child value has a parent
|
||||
/// 2. Any docid in the child appears in the parent
|
||||
/// 3. No docid in the parent is missing from all its children
|
||||
/// 4. no group is bigger than max_group_size
|
||||
/// 5. Less than 50% of groups are bigger than group_size
|
||||
/// 6. group size matches the number of children
|
||||
/// 7. max_level is < 255
|
||||
pub(crate) fn sanity_checks(
|
||||
index: &Index,
|
||||
rtxn: &heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
facet_type: FacetType,
|
||||
group_size: usize,
|
||||
_min_level_size: usize, // might add a check on level size later
|
||||
max_group_size: usize,
|
||||
) -> Result<()> {
|
||||
tracing::info!(%field_id, ?facet_type, "performing sanity checks");
|
||||
let database = match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||
}
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||
}
|
||||
};
|
||||
|
||||
let leaf_prefix: FacetGroupKey<&[u8]> = FacetGroupKey { field_id, level: 0, left_bound: &[] };
|
||||
|
||||
let leaf_it = database.prefix_iter(rtxn, &leaf_prefix)?;
|
||||
|
||||
let max_level = get_highest_level(rtxn, database, field_id)?;
|
||||
if max_level == u8::MAX {
|
||||
panic!("max_level == 255");
|
||||
}
|
||||
|
||||
for leaf in leaf_it {
|
||||
let (leaf_facet_value, leaf_docids) = leaf?;
|
||||
let mut current_level = 0;
|
||||
|
||||
let mut current_parent_facet_value: Option<FacetGroupKey<&[u8]>> = None;
|
||||
let mut current_parent_docids: Option<crate::heed_codec::facet::FacetGroupValue> = None;
|
||||
loop {
|
||||
current_level += 1;
|
||||
if current_level >= max_level {
|
||||
break;
|
||||
}
|
||||
let parent_key_right_bound = FacetGroupKey {
|
||||
field_id,
|
||||
level: current_level,
|
||||
left_bound: leaf_facet_value.left_bound,
|
||||
};
|
||||
let (parent_facet_value, parent_docids) = database
|
||||
.get_lower_than_or_equal_to(rtxn, &parent_key_right_bound)?
|
||||
.expect("no parent found");
|
||||
if parent_facet_value.level != current_level {
|
||||
panic!(
|
||||
"wrong parent level, found_level={}, expected_level={}",
|
||||
parent_facet_value.level, current_level
|
||||
);
|
||||
}
|
||||
if parent_facet_value.field_id != field_id {
|
||||
panic!("wrong parent fid");
|
||||
}
|
||||
if parent_facet_value.left_bound > leaf_facet_value.left_bound {
|
||||
panic!("wrong parent left bound");
|
||||
}
|
||||
|
||||
if !leaf_docids.bitmap.is_subset(&parent_docids.bitmap) {
|
||||
panic!(
|
||||
"missing docids from leaf in parent, current_level={}, parent={}, child={}, missing={missing:?}, child_len={}, child={:?}",
|
||||
current_level,
|
||||
facet_to_string(parent_facet_value.left_bound, facet_type),
|
||||
facet_to_string(leaf_facet_value.left_bound, facet_type),
|
||||
leaf_docids.bitmap.len(),
|
||||
leaf_docids.bitmap.clone(),
|
||||
missing=leaf_docids.bitmap - parent_docids.bitmap,
|
||||
)
|
||||
}
|
||||
|
||||
if let Some(current_parent_facet_value) = current_parent_facet_value {
|
||||
if current_parent_facet_value.field_id != parent_facet_value.field_id {
|
||||
panic!("wrong parent parent fid");
|
||||
}
|
||||
if current_parent_facet_value.level + 1 != parent_facet_value.level {
|
||||
panic!("wrong parent parent level");
|
||||
}
|
||||
if current_parent_facet_value.left_bound < parent_facet_value.left_bound {
|
||||
panic!("wrong parent parent left bound");
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(current_parent_docids) = current_parent_docids {
|
||||
if !current_parent_docids.bitmap.is_subset(&parent_docids.bitmap) {
|
||||
panic!("missing docids from intermediate node in parent, parent_level={}, parent={}, intermediate={}, missing={missing:?}, intermediate={:?}",
|
||||
parent_facet_value.level,
|
||||
facet_to_string(parent_facet_value.left_bound, facet_type),
|
||||
facet_to_string(current_parent_facet_value.unwrap().left_bound, facet_type),
|
||||
current_parent_docids.bitmap.clone(),
|
||||
missing=current_parent_docids.bitmap - parent_docids.bitmap,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
current_parent_facet_value = Some(parent_facet_value);
|
||||
current_parent_docids = Some(parent_docids);
|
||||
}
|
||||
}
|
||||
tracing::info!(%field_id, ?facet_type, "checked all leaves");
|
||||
|
||||
let mut current_level = max_level;
|
||||
let mut greater_than_group = 0usize;
|
||||
let mut total = 0usize;
|
||||
loop {
|
||||
if current_level == 0 {
|
||||
break;
|
||||
}
|
||||
let child_level = current_level - 1;
|
||||
tracing::info!(%field_id, ?facet_type, %current_level, "checked groups for level");
|
||||
let level_groups_prefix: FacetGroupKey<&[u8]> =
|
||||
FacetGroupKey { field_id, level: current_level, left_bound: &[] };
|
||||
let mut level_groups_it = database.prefix_iter(rtxn, &level_groups_prefix)?.peekable();
|
||||
|
||||
'group_it: loop {
|
||||
let Some(group) = level_groups_it.next() else { break 'group_it };
|
||||
|
||||
let (group_facet_value, group_docids) = group?;
|
||||
let child_left_bound = group_facet_value.left_bound.to_owned();
|
||||
let mut expected_docids = RoaringBitmap::new();
|
||||
let mut expected_size = 0usize;
|
||||
let right_bound = level_groups_it
|
||||
.peek()
|
||||
.and_then(|res| res.as_ref().ok())
|
||||
.map(|(key, _)| key.left_bound);
|
||||
let child_left_bound = FacetGroupKey {
|
||||
field_id,
|
||||
level: child_level,
|
||||
left_bound: child_left_bound.as_slice(),
|
||||
};
|
||||
let child_left_bound = Bound::Included(&child_left_bound);
|
||||
let child_right_bound;
|
||||
let child_right_bound = if let Some(right_bound) = right_bound {
|
||||
child_right_bound =
|
||||
FacetGroupKey { field_id, level: child_level, left_bound: right_bound };
|
||||
Bound::Excluded(&child_right_bound)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
let children = database.range(rtxn, &(child_left_bound, child_right_bound))?;
|
||||
for child in children {
|
||||
let (child_facet_value, child_docids) = child?;
|
||||
if child_facet_value.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
if child_facet_value.level != child_level {
|
||||
break;
|
||||
}
|
||||
expected_size += 1;
|
||||
expected_docids |= &child_docids.bitmap;
|
||||
}
|
||||
assert_eq!(expected_size, group_docids.size as usize);
|
||||
assert!(expected_size <= max_group_size);
|
||||
assert_eq!(expected_docids, group_docids.bitmap);
|
||||
total += 1;
|
||||
if expected_size > group_size {
|
||||
greater_than_group += 1;
|
||||
}
|
||||
}
|
||||
|
||||
current_level -= 1;
|
||||
}
|
||||
if greater_than_group * 2 > total {
|
||||
panic!("too many groups have a size > group_size");
|
||||
}
|
||||
|
||||
tracing::info!("sanity checks OK");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn facet_to_string(facet_value: &[u8], facet_type: FacetType) -> String {
|
||||
match facet_type {
|
||||
FacetType::String => bstr::BStr::new(facet_value).to_string(),
|
||||
FacetType::Number => match OrderedF64Codec::bytes_decode(facet_value) {
|
||||
Ok(value) => value.to_string(),
|
||||
Err(e) => format!("error: {e} (bytes: {facet_value:?}"),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
498
crates/milli/src/update/facet/new_incremental.rs
Normal file
498
crates/milli/src/update/facet/new_incremental.rs
Normal file
@@ -0,0 +1,498 @@
|
||||
use std::ops::Bound;
|
||||
|
||||
use heed::types::{Bytes, DecodeIgnore};
|
||||
use heed::{BytesDecode as _, Database, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::update::valid_facet_value;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
pub struct FacetsUpdateIncremental {
|
||||
inner: FacetsUpdateIncrementalInner,
|
||||
delta_data: Vec<FacetFieldIdChange>,
|
||||
}
|
||||
|
||||
struct FacetsUpdateIncrementalInner {
|
||||
db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: FieldId,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
max_group_size: u8,
|
||||
}
|
||||
|
||||
impl FacetsUpdateIncremental {
|
||||
pub fn new(
|
||||
index: &Index,
|
||||
facet_type: FacetType,
|
||||
field_id: FieldId,
|
||||
delta_data: Vec<FacetFieldIdChange>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
max_group_size: u8,
|
||||
) -> Self {
|
||||
FacetsUpdateIncremental {
|
||||
inner: FacetsUpdateIncrementalInner {
|
||||
db: match facet_type {
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
FacetType::Number => index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
},
|
||||
field_id,
|
||||
group_size,
|
||||
min_level_size,
|
||||
max_group_size,
|
||||
},
|
||||
|
||||
delta_data,
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")]
|
||||
pub fn execute(mut self, wtxn: &mut RwTxn) -> Result<()> {
|
||||
if self.delta_data.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
self.delta_data.sort_unstable_by(
|
||||
|FacetFieldIdChange { facet_value: left, .. },
|
||||
FacetFieldIdChange { facet_value: right, .. }| {
|
||||
left.cmp(right)
|
||||
// sort in **reverse** lexicographic order
|
||||
.reverse()
|
||||
},
|
||||
);
|
||||
|
||||
self.inner.find_changed_parents(wtxn, self.delta_data)?;
|
||||
|
||||
self.inner.add_or_delete_level(wtxn)
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetsUpdateIncrementalInner {
|
||||
/// WARNING: `changed_children` must be sorted in **reverse** lexicographic order.
|
||||
fn find_changed_parents(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
mut changed_children: Vec<FacetFieldIdChange>,
|
||||
) -> Result<()> {
|
||||
let mut changed_parents = vec![];
|
||||
for child_level in 0u8..u8::MAX {
|
||||
// child_level < u8::MAX by construction
|
||||
let parent_level = child_level + 1;
|
||||
let parent_level_left_bound: FacetGroupKey<&[u8]> =
|
||||
FacetGroupKey { field_id: self.field_id, level: parent_level, left_bound: &[] };
|
||||
|
||||
let mut last_parent: Option<Box<[u8]>> = None;
|
||||
let mut child_it = changed_children
|
||||
// drain all changed children
|
||||
.drain(..)
|
||||
// keep only children whose value is valid in the LMDB sense
|
||||
.filter(|child| valid_facet_value(&child.facet_value));
|
||||
// `while let` rather than `for` because we advance `child_it` inside of the loop
|
||||
'current_level: while let Some(child) = child_it.next() {
|
||||
if let Some(last_parent) = &last_parent {
|
||||
if &child.facet_value >= last_parent {
|
||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
||||
continue 'current_level;
|
||||
}
|
||||
}
|
||||
|
||||
// need to find a new parent
|
||||
let parent_key_prefix = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: parent_level,
|
||||
left_bound: &*child.facet_value,
|
||||
};
|
||||
|
||||
let parent = self
|
||||
.db
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.rev_range(
|
||||
wtxn,
|
||||
&(
|
||||
Bound::Excluded(&parent_level_left_bound),
|
||||
Bound::Included(&parent_key_prefix),
|
||||
),
|
||||
)?
|
||||
.next();
|
||||
|
||||
match parent {
|
||||
Some(Ok((parent_key, _parent_value))) => {
|
||||
// found parent, cache it for next keys
|
||||
last_parent = Some(parent_key.left_bound.to_owned().into_boxed_slice());
|
||||
|
||||
// add to modified list for parent level
|
||||
changed_parents.push(FacetFieldIdChange {
|
||||
facet_value: parent_key.left_bound.to_owned().into_boxed_slice(),
|
||||
});
|
||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
||||
}
|
||||
Some(Err(err)) => return Err(err.into()),
|
||||
None => {
|
||||
// no parent for that key
|
||||
let mut parent_it = self
|
||||
.db
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.prefix_iter_mut(wtxn, &parent_level_left_bound)?;
|
||||
match parent_it.next() {
|
||||
// 1. left of the current left bound, or
|
||||
Some(Ok((first_key, _first_value))) => {
|
||||
// make sure we don't spill on the neighboring fid (level also included defensively)
|
||||
if first_key.field_id != self.field_id
|
||||
|| first_key.level != parent_level
|
||||
{
|
||||
// max level reached, exit
|
||||
drop(parent_it);
|
||||
self.compute_parent_group(
|
||||
wtxn,
|
||||
child_level,
|
||||
child.facet_value,
|
||||
)?;
|
||||
for child in child_it.by_ref() {
|
||||
self.compute_parent_group(
|
||||
wtxn,
|
||||
child_level,
|
||||
child.facet_value,
|
||||
)?;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
// remove old left bound
|
||||
unsafe { parent_it.del_current()? };
|
||||
drop(parent_it);
|
||||
changed_parents.push(FacetFieldIdChange {
|
||||
facet_value: child.facet_value.clone(),
|
||||
});
|
||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
||||
// pop all elements in order to visit the new left bound
|
||||
let new_left_bound =
|
||||
&mut changed_parents.last_mut().unwrap().facet_value;
|
||||
for child in child_it.by_ref() {
|
||||
new_left_bound.clone_from(&child.facet_value);
|
||||
|
||||
self.compute_parent_group(
|
||||
wtxn,
|
||||
child_level,
|
||||
child.facet_value,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Some(Err(err)) => return Err(err.into()),
|
||||
// 2. max level reached, exit
|
||||
None => {
|
||||
drop(parent_it);
|
||||
self.compute_parent_group(wtxn, child_level, child.facet_value)?;
|
||||
for child in child_it.by_ref() {
|
||||
self.compute_parent_group(
|
||||
wtxn,
|
||||
child_level,
|
||||
child.facet_value,
|
||||
)?;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if changed_parents.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
drop(child_it);
|
||||
std::mem::swap(&mut changed_children, &mut changed_parents);
|
||||
// changed_parents is now empty because changed_children was emptied by the drain
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_parent_group(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
parent_level: u8,
|
||||
parent_left_bound: Box<[u8]>,
|
||||
) -> Result<()> {
|
||||
let mut range_left_bound: Vec<u8> = parent_left_bound.into();
|
||||
if parent_level == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
let child_level = parent_level - 1;
|
||||
|
||||
let parent_key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: parent_level,
|
||||
left_bound: &*range_left_bound,
|
||||
};
|
||||
let child_right_bound = self
|
||||
.db
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.get_greater_than(wtxn, &parent_key)?
|
||||
.and_then(
|
||||
|(
|
||||
FacetGroupKey {
|
||||
level: right_level,
|
||||
field_id: right_fid,
|
||||
left_bound: right_bound,
|
||||
},
|
||||
_,
|
||||
)| {
|
||||
if parent_level != right_level || self.field_id != right_fid {
|
||||
// there was a greater key, but with a greater level or fid, so not a sibling to the parent: ignore
|
||||
return None;
|
||||
}
|
||||
Some(right_bound.to_owned())
|
||||
},
|
||||
);
|
||||
let child_right_bound = match &child_right_bound {
|
||||
Some(right_bound) => Bound::Excluded(FacetGroupKey {
|
||||
left_bound: right_bound.as_slice(),
|
||||
field_id: self.field_id,
|
||||
level: child_level,
|
||||
}),
|
||||
None => Bound::Unbounded,
|
||||
};
|
||||
|
||||
let child_left_key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: child_level,
|
||||
left_bound: &*range_left_bound,
|
||||
};
|
||||
let mut child_left_bound = Bound::Included(child_left_key);
|
||||
|
||||
loop {
|
||||
// do a first pass on the range to find the number of children
|
||||
let child_count = self
|
||||
.db
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.range(wtxn, &(child_left_bound, child_right_bound))?
|
||||
.take(self.max_group_size as usize * 2)
|
||||
.count();
|
||||
let mut child_it = self.db.range(wtxn, &(child_left_bound, child_right_bound))?;
|
||||
|
||||
// pick the right group_size depending on the number of children
|
||||
let group_size = if child_count >= self.max_group_size as usize * 2 {
|
||||
// more than twice the max_group_size => there will be space for at least 2 groups of max_group_size
|
||||
self.max_group_size as usize
|
||||
} else if child_count >= self.group_size as usize {
|
||||
// size in [group_size, max_group_size * 2[
|
||||
// divided by 2 it is between [group_size / 2, max_group_size[
|
||||
// this ensures that the tree is balanced
|
||||
child_count / 2
|
||||
} else {
|
||||
// take everything
|
||||
child_count
|
||||
};
|
||||
|
||||
let res: Result<_> = child_it
|
||||
.by_ref()
|
||||
.take(group_size)
|
||||
// stop if we go to the next level or field id
|
||||
.take_while(|res| match res {
|
||||
Ok((child_key, _)) => {
|
||||
child_key.field_id == self.field_id && child_key.level == child_level
|
||||
}
|
||||
Err(_) => true,
|
||||
})
|
||||
.try_fold(
|
||||
(None, FacetGroupValue { size: 0, bitmap: Default::default() }),
|
||||
|(bounds, mut group_value), child_res| {
|
||||
let (child_key, child_value) = child_res?;
|
||||
let bounds = match bounds {
|
||||
Some((left_bound, _)) => Some((left_bound, child_key.left_bound)),
|
||||
None => Some((child_key.left_bound, child_key.left_bound)),
|
||||
};
|
||||
// max_group_size <= u8::MAX
|
||||
group_value.size += 1;
|
||||
group_value.bitmap |= &child_value.bitmap;
|
||||
Ok((bounds, group_value))
|
||||
},
|
||||
);
|
||||
|
||||
let (bounds, group_value) = res?;
|
||||
|
||||
let Some((group_left_bound, right_bound)) = bounds else {
|
||||
let update_key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: parent_level,
|
||||
left_bound: &*range_left_bound,
|
||||
};
|
||||
drop(child_it);
|
||||
if let Bound::Included(_) = child_left_bound {
|
||||
self.db.delete(wtxn, &update_key)?;
|
||||
}
|
||||
|
||||
break;
|
||||
};
|
||||
|
||||
drop(child_it);
|
||||
let current_left_bound = group_left_bound.to_owned();
|
||||
|
||||
let delete_old_bound = match child_left_bound {
|
||||
Bound::Included(bound) => {
|
||||
if bound.left_bound != current_left_bound {
|
||||
Some(range_left_bound.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
range_left_bound.clear();
|
||||
range_left_bound.extend_from_slice(right_bound);
|
||||
let child_left_key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: child_level,
|
||||
left_bound: range_left_bound.as_slice(),
|
||||
};
|
||||
child_left_bound = Bound::Excluded(child_left_key);
|
||||
|
||||
if let Some(old_bound) = delete_old_bound {
|
||||
let update_key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: parent_level,
|
||||
left_bound: old_bound.as_slice(),
|
||||
};
|
||||
self.db.delete(wtxn, &update_key)?;
|
||||
}
|
||||
|
||||
let update_key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: parent_level,
|
||||
left_bound: current_left_bound.as_slice(),
|
||||
};
|
||||
if group_value.bitmap.is_empty() {
|
||||
self.db.delete(wtxn, &update_key)?;
|
||||
} else {
|
||||
self.db.put(wtxn, &update_key, &group_value)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check whether the highest level has exceeded `min_level_size` * `self.group_size`.
|
||||
/// If it has, we must build an addition level above it.
|
||||
/// Then check whether the highest level is under `min_level_size`.
|
||||
/// If it has, we must remove the complete level.
|
||||
pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn<'_>) -> Result<()> {
|
||||
let highest_level = get_highest_level(txn, self.db, self.field_id)?;
|
||||
let mut highest_level_prefix = vec![];
|
||||
highest_level_prefix.extend_from_slice(&self.field_id.to_be_bytes());
|
||||
highest_level_prefix.push(highest_level);
|
||||
|
||||
let size_highest_level =
|
||||
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
|
||||
|
||||
if size_highest_level >= self.group_size as usize * self.min_level_size as usize {
|
||||
self.add_level(txn, highest_level, &highest_level_prefix, size_highest_level)
|
||||
} else if size_highest_level < self.min_level_size as usize && highest_level != 0 {
|
||||
self.delete_level(txn, &highest_level_prefix)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a level.
|
||||
fn delete_level(&self, txn: &mut RwTxn<'_>, highest_level_prefix: &[u8]) -> Result<()> {
|
||||
let mut to_delete = vec![];
|
||||
let mut iter =
|
||||
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, highest_level_prefix)?;
|
||||
for el in iter.by_ref() {
|
||||
let (k, _) = el?;
|
||||
to_delete.push(
|
||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
|
||||
.map_err(heed::Error::Encoding)?
|
||||
.into_owned(),
|
||||
);
|
||||
}
|
||||
drop(iter);
|
||||
for k in to_delete {
|
||||
self.db.delete(txn, &k.as_ref())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Build an additional level for the field id.
|
||||
fn add_level(
|
||||
&self,
|
||||
txn: &mut RwTxn<'_>,
|
||||
highest_level: u8,
|
||||
highest_level_prefix: &[u8],
|
||||
size_highest_level: usize,
|
||||
) -> Result<()> {
|
||||
let mut groups_iter = self
|
||||
.db
|
||||
.remap_types::<Bytes, FacetGroupValueCodec>()
|
||||
.prefix_iter(txn, highest_level_prefix)?;
|
||||
|
||||
let nbr_new_groups = size_highest_level / self.group_size as usize;
|
||||
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
|
||||
|
||||
let mut to_add = vec![];
|
||||
for _ in 0..nbr_new_groups {
|
||||
let mut first_key = None;
|
||||
let mut values = RoaringBitmap::new();
|
||||
for _ in 0..self.group_size {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
||||
.map_err(heed::Error::Encoding)?;
|
||||
|
||||
if first_key.is_none() {
|
||||
first_key = Some(key_i);
|
||||
}
|
||||
values |= value_i.bitmap;
|
||||
}
|
||||
let key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: highest_level + 1,
|
||||
left_bound: first_key.unwrap().left_bound,
|
||||
};
|
||||
let value = FacetGroupValue { size: self.group_size, bitmap: values };
|
||||
to_add.push((key.into_owned(), value));
|
||||
}
|
||||
// now we add the rest of the level, in case its size is > group_size * min_level_size
|
||||
// this can indeed happen if the min_level_size parameter changes between two calls to `insert`
|
||||
if nbr_leftover_elements > 0 {
|
||||
let mut first_key = None;
|
||||
let mut values = RoaringBitmap::new();
|
||||
for _ in 0..nbr_leftover_elements {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
||||
.map_err(heed::Error::Encoding)?;
|
||||
|
||||
if first_key.is_none() {
|
||||
first_key = Some(key_i);
|
||||
}
|
||||
values |= value_i.bitmap;
|
||||
}
|
||||
let key = FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: highest_level + 1,
|
||||
left_bound: first_key.unwrap().left_bound,
|
||||
};
|
||||
// Note: nbr_leftover_elements can be casted to a u8 since it is bounded by `max_group_size`
|
||||
// when it is created above.
|
||||
let value = FacetGroupValue { size: nbr_leftover_elements as u8, bitmap: values };
|
||||
to_add.push((key.into_owned(), value));
|
||||
}
|
||||
|
||||
drop(groups_iter);
|
||||
for (key, value) in to_add {
|
||||
self.db.put(txn, &key.as_ref(), &value)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FacetFieldIdChange {
|
||||
pub facet_value: Box<[u8]>,
|
||||
}
|
||||
@@ -123,7 +123,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
}
|
||||
}
|
||||
|
||||
let document_id = serde_json::to_vec(&document_id).map_err(InternalError::SerdeJson)?;
|
||||
let document_id =
|
||||
serde_json::to_vec(&document_id).map_err(InternalError::SerdeJson).unwrap();
|
||||
external_ids.insert(count.to_be_bytes(), document_id)?;
|
||||
|
||||
count += 1;
|
||||
@@ -237,7 +238,7 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult
|
||||
let debug_id = || {
|
||||
serde_json::from_slice(id.value().as_bytes()).unwrap_or_else(|_| Value::from(id.debug()))
|
||||
};
|
||||
match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? {
|
||||
match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson).unwrap() {
|
||||
Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) {
|
||||
(Some(lat), Some(lng)) => {
|
||||
match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user