mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-23 13:16:33 +00:00
Compare commits
124 Commits
CaroFG-pat
...
default-ex
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0ab633df76 | ||
|
|
3d013cdebe | ||
|
|
ddeff5678f | ||
|
|
a235434910 | ||
|
|
a376525348 | ||
|
|
ea70a7d1c9 | ||
|
|
9304f8e586 | ||
|
|
495db080ec | ||
|
|
d71341fa48 | ||
|
|
5b3070d8c3 | ||
|
|
89006fd4b3 | ||
|
|
49f50a0a21 | ||
|
|
1104f00803 | ||
|
|
33fa564a9c | ||
|
|
a097b254f8 | ||
|
|
54cb0ec437 | ||
|
|
38ed1f1dbb | ||
|
|
643dd33358 | ||
|
|
32f9fb6ab2 | ||
|
|
b5966f82e8 | ||
|
|
5e54063aab | ||
|
|
40456795d0 | ||
|
|
40e60c6f52 | ||
|
|
eeae6383d0 | ||
|
|
8cbcaeff56 | ||
|
|
ce87d5a89e | ||
|
|
9f7172f6ab | ||
|
|
d6eca83cfa | ||
|
|
a9d6e86077 | ||
|
|
346f9efe3a | ||
|
|
a987d698c1 | ||
|
|
fc3508c8c8 | ||
|
|
dbb45dec1a | ||
|
|
5f69a43846 | ||
|
|
fe1e4814fa | ||
|
|
c29749741b | ||
|
|
3e47201365 | ||
|
|
ec9719f3b1 | ||
|
|
b2cc9e4db8 | ||
|
|
56198bae48 | ||
|
|
888059b2d0 | ||
|
|
410f2fc8c3 | ||
|
|
54e244d2f3 | ||
|
|
e0c36972fb | ||
|
|
daadcddb5e | ||
|
|
7f92dafa02 | ||
|
|
cc5d12a368 | ||
|
|
0f98b996b5 | ||
|
|
d005ca5bf7 | ||
|
|
7e65fb1d3e | ||
|
|
cdefb3f665 | ||
|
|
a91887221a | ||
|
|
9c66b20a97 | ||
|
|
a48283527e | ||
|
|
73f78c19b0 | ||
|
|
34639e346e | ||
|
|
7af2a254d6 | ||
|
|
0f9d262a1c | ||
|
|
747476a225 | ||
|
|
34765b556b | ||
|
|
dfb4860578 | ||
|
|
ce62713f02 | ||
|
|
8b5d04d60f | ||
|
|
1b74709b91 | ||
|
|
a5c0a282c5 | ||
|
|
4fc048ff20 | ||
|
|
375b5600cd | ||
|
|
32b997d817 | ||
|
|
ff3090e3cc | ||
|
|
6c6645f945 | ||
|
|
af6473d999 | ||
|
|
11851f9701 | ||
|
|
cc4654eabd | ||
|
|
0bb91f4a77 | ||
|
|
f9d57f54df | ||
|
|
3ef1afc0f1 | ||
|
|
dbb5abebb6 | ||
|
|
700f33bd39 | ||
|
|
d01bbbccde | ||
|
|
4fc506f267 | ||
|
|
dc456276e5 | ||
|
|
b2ea50cb10 | ||
|
|
5074cf92ab | ||
|
|
a92bc8d192 | ||
|
|
ee538cf045 | ||
|
|
2b05d63a0f | ||
|
|
104e8918ce | ||
|
|
d6ec4d4f4a | ||
|
|
f0e7326b7a | ||
|
|
c8106a0006 | ||
|
|
c9ab5bc0b6 | ||
|
|
5e0f15fd43 | ||
|
|
4c30f090c7 | ||
|
|
63f247cdda | ||
|
|
e109fa9529 | ||
|
|
76e4ec2168 | ||
|
|
982babdb74 | ||
|
|
7ae2ae33d9 | ||
|
|
cb0788ae07 | ||
|
|
cb3e5dc234 | ||
|
|
59d40a2821 | ||
|
|
98a678e73d | ||
|
|
70292aae3c | ||
|
|
73521f0069 | ||
|
|
4533179604 | ||
|
|
1a21cc1a17 | ||
|
|
d08042f8a7 | ||
|
|
77aadb5f22 | ||
|
|
4fd913f7eb | ||
|
|
4b72e54ca7 | ||
|
|
adef2cc132 | ||
|
|
533b9951b1 | ||
|
|
9103cbc9db | ||
|
|
083de2bfc1 | ||
|
|
8618a4d2ba | ||
|
|
08bc982748 | ||
|
|
e9c5df7993 | ||
|
|
8a28b3aa77 | ||
|
|
1a0b100ad9 | ||
|
|
ff93563f41 | ||
|
|
2f25258191 | ||
|
|
2859079c32 | ||
|
|
74b83d305f | ||
|
|
70f6e4b828 |
381
Cargo.lock
generated
381
Cargo.lock
generated
@@ -310,6 +310,7 @@ dependencies = [
|
|||||||
"const-random",
|
"const-random",
|
||||||
"getrandom 0.3.3",
|
"getrandom 0.3.3",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"serde",
|
||||||
"version_check",
|
"version_check",
|
||||||
"zerocopy",
|
"zerocopy",
|
||||||
]
|
]
|
||||||
@@ -344,12 +345,6 @@ version = "0.2.21"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "allocator-api2"
|
|
||||||
version = "0.3.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c583acf993cf4245c4acb0a2cc2ab1f9cc097de73411bb6d3647ff6af2b1013d"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anes"
|
name = "anes"
|
||||||
version = "0.1.6"
|
version = "0.1.6"
|
||||||
@@ -492,7 +487,7 @@ dependencies = [
|
|||||||
"backoff",
|
"backoff",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"bytes",
|
"bytes",
|
||||||
"derive_builder 0.20.2",
|
"derive_builder",
|
||||||
"eventsource-stream",
|
"eventsource-stream",
|
||||||
"futures",
|
"futures",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
@@ -589,7 +584,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -799,7 +794,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@@ -812,7 +807,7 @@ version = "3.19.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"allocator-api2 0.2.21",
|
"allocator-api2",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -822,7 +817,7 @@ version = "0.1.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
|
checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"allocator-api2 0.2.21",
|
"allocator-api2",
|
||||||
"bitpacking",
|
"bitpacking",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"hashbrown 0.15.5",
|
"hashbrown 0.15.5",
|
||||||
@@ -945,7 +940,7 @@ dependencies = [
|
|||||||
"rand 0.9.2",
|
"rand 0.9.2",
|
||||||
"rand_distr",
|
"rand_distr",
|
||||||
"rayon",
|
"rayon",
|
||||||
"safetensors",
|
"safetensors 0.4.5",
|
||||||
"thiserror 1.0.69",
|
"thiserror 1.0.69",
|
||||||
"ug",
|
"ug",
|
||||||
"ug-cuda",
|
"ug-cuda",
|
||||||
@@ -972,7 +967,7 @@ dependencies = [
|
|||||||
"half",
|
"half",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"rayon",
|
"rayon",
|
||||||
"safetensors",
|
"safetensors 0.4.5",
|
||||||
"serde",
|
"serde",
|
||||||
"thiserror 1.0.69",
|
"thiserror 1.0.69",
|
||||||
]
|
]
|
||||||
@@ -1052,6 +1047,15 @@ version = "0.3.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "castaway"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
|
||||||
|
dependencies = [
|
||||||
|
"rustversion",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.2.37"
|
version = "1.2.37"
|
||||||
@@ -1128,9 +1132,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "charabia"
|
name = "charabia"
|
||||||
version = "0.9.7"
|
version = "0.9.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7c2f456825b7f15eac01a1cae40c12c3f55e931d4327e6e4fa59508d664e9568"
|
checksum = "bbdc8cd8f999e8b8b13ed71d30962bbf98cf39e2f2a9f1ae1ba354199239d66e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"csv",
|
"csv",
|
||||||
@@ -1139,7 +1143,6 @@ dependencies = [
|
|||||||
"irg-kvariants",
|
"irg-kvariants",
|
||||||
"jieba-rs",
|
"jieba-rs",
|
||||||
"lindera",
|
"lindera",
|
||||||
"once_cell",
|
|
||||||
"pinyin",
|
"pinyin",
|
||||||
"serde",
|
"serde",
|
||||||
"slice-group-by",
|
"slice-group-by",
|
||||||
@@ -1215,7 +1218,7 @@ dependencies = [
|
|||||||
"anstream",
|
"anstream",
|
||||||
"anstyle",
|
"anstyle",
|
||||||
"clap_lex",
|
"clap_lex",
|
||||||
"strsim 0.11.1",
|
"strsim",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1254,6 +1257,21 @@ version = "1.0.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "compact_str"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
|
||||||
|
dependencies = [
|
||||||
|
"castaway",
|
||||||
|
"cfg-if",
|
||||||
|
"itoa",
|
||||||
|
"rustversion",
|
||||||
|
"ryu",
|
||||||
|
"serde",
|
||||||
|
"static_assertions",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "concat-arrays"
|
name = "concat-arrays"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
@@ -1512,38 +1530,14 @@ dependencies = [
|
|||||||
"libloading",
|
"libloading",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "darling"
|
|
||||||
version = "0.14.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850"
|
|
||||||
dependencies = [
|
|
||||||
"darling_core 0.14.4",
|
|
||||||
"darling_macro 0.14.4",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "darling"
|
name = "darling"
|
||||||
version = "0.20.11"
|
version = "0.20.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
|
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling_core 0.20.11",
|
"darling_core",
|
||||||
"darling_macro 0.20.11",
|
"darling_macro",
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "darling_core"
|
|
||||||
version = "0.14.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0"
|
|
||||||
dependencies = [
|
|
||||||
"fnv",
|
|
||||||
"ident_case",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"strsim 0.10.0",
|
|
||||||
"syn 1.0.109",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1556,28 +1550,17 @@ dependencies = [
|
|||||||
"ident_case",
|
"ident_case",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"strsim 0.11.1",
|
"strsim",
|
||||||
"syn 2.0.106",
|
"syn 2.0.106",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "darling_macro"
|
|
||||||
version = "0.14.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e"
|
|
||||||
dependencies = [
|
|
||||||
"darling_core 0.14.4",
|
|
||||||
"quote",
|
|
||||||
"syn 1.0.109",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "darling_macro"
|
name = "darling_macro"
|
||||||
version = "0.20.11"
|
version = "0.20.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
|
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling_core 0.20.11",
|
"darling_core",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.106",
|
"syn 2.0.106",
|
||||||
]
|
]
|
||||||
@@ -1587,6 +1570,9 @@ name = "dary_heap"
|
|||||||
version = "0.3.7"
|
version = "0.3.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
|
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deadpool"
|
name = "deadpool"
|
||||||
@@ -1642,34 +1628,13 @@ dependencies = [
|
|||||||
"syn 2.0.106",
|
"syn 2.0.106",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "derive_builder"
|
|
||||||
version = "0.12.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8"
|
|
||||||
dependencies = [
|
|
||||||
"derive_builder_macro 0.12.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_builder"
|
name = "derive_builder"
|
||||||
version = "0.20.2"
|
version = "0.20.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
|
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"derive_builder_macro 0.20.2",
|
"derive_builder_macro",
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "derive_builder_core"
|
|
||||||
version = "0.12.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f"
|
|
||||||
dependencies = [
|
|
||||||
"darling 0.14.4",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn 1.0.109",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1678,29 +1643,19 @@ version = "0.20.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
|
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling 0.20.11",
|
"darling",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.106",
|
"syn 2.0.106",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "derive_builder_macro"
|
|
||||||
version = "0.12.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e"
|
|
||||||
dependencies = [
|
|
||||||
"derive_builder_core 0.12.0",
|
|
||||||
"syn 1.0.109",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_builder_macro"
|
name = "derive_builder_macro"
|
||||||
version = "0.20.2"
|
version = "0.20.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
|
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"derive_builder_core 0.20.2",
|
"derive_builder_core",
|
||||||
"syn 2.0.106",
|
"syn 2.0.106",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -1739,7 +1694,7 @@ dependencies = [
|
|||||||
"serde-cs",
|
"serde-cs",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_urlencoded",
|
"serde_urlencoded",
|
||||||
"strsim 0.11.1",
|
"strsim",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1824,12 +1779,12 @@ version = "0.4.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
|
checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf",
|
"phf 0.11.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -2072,7 +2027,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror 2.0.16",
|
"thiserror 2.0.16",
|
||||||
@@ -2094,7 +2049,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
@@ -2122,7 +2077,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -2279,7 +2234,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -2805,7 +2760,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.8.12",
|
"ahash 0.8.12",
|
||||||
"allocator-api2 0.2.21",
|
"allocator-api2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2814,7 +2769,7 @@ version = "0.15.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"allocator-api2 0.2.21",
|
"allocator-api2",
|
||||||
"equivalent",
|
"equivalent",
|
||||||
"foldhash",
|
"foldhash",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -2838,9 +2793,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed"
|
name = "heed"
|
||||||
version = "0.22.1-nested-rtxns"
|
version = "0.22.1-nested-rtxns-6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0ff115ba5712b1f1fc7617b195f5c2f139e29c397ff79da040cd19db75ccc240"
|
checksum = "c69e07cd539834bedcfa938f3d7d8520cce1ad2b0776c122b5ccdf8fd5bafe12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.9.4",
|
"bitflags 2.9.4",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -3233,7 +3188,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"backoff",
|
"backoff",
|
||||||
@@ -3242,10 +3197,11 @@ dependencies = [
|
|||||||
"bumpalo",
|
"bumpalo",
|
||||||
"bumparaw-collections",
|
"bumparaw-collections",
|
||||||
"byte-unit",
|
"byte-unit",
|
||||||
|
"bytes",
|
||||||
"convert_case 0.8.0",
|
"convert_case 0.8.0",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
"derive_builder 0.20.2",
|
"derive_builder",
|
||||||
"dump",
|
"dump",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
"file-store",
|
"file-store",
|
||||||
@@ -3259,13 +3215,17 @@ dependencies = [
|
|||||||
"memmap2",
|
"memmap2",
|
||||||
"page_size",
|
"page_size",
|
||||||
"rayon",
|
"rayon",
|
||||||
|
"reqwest",
|
||||||
"roaring 0.10.12",
|
"roaring 0.10.12",
|
||||||
|
"rusty-s3",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"synchronoise",
|
"synchronoise",
|
||||||
|
"tar",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror 2.0.16",
|
"thiserror 2.0.16",
|
||||||
"time",
|
"time",
|
||||||
|
"tokio",
|
||||||
"tracing",
|
"tracing",
|
||||||
"ureq",
|
"ureq",
|
||||||
"uuid",
|
"uuid",
|
||||||
@@ -3408,15 +3368,6 @@ dependencies = [
|
|||||||
"either",
|
"either",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "itertools"
|
|
||||||
version = "0.12.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
|
||||||
dependencies = [
|
|
||||||
"either",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itertools"
|
name = "itertools"
|
||||||
version = "0.13.0"
|
version = "0.13.0"
|
||||||
@@ -3443,26 +3394,49 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jieba-macros"
|
name = "jieba-macros"
|
||||||
version = "0.7.1"
|
version = "0.8.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7c676b32a471d3cfae8dac2ad2f8334cd52e53377733cca8c1fb0a5062fec192"
|
checksum = "348294e44ee7e3c42685da656490f8febc7359632544019621588902216da95c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_codegen",
|
"phf_codegen",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jieba-rs"
|
name = "jieba-rs"
|
||||||
version = "0.7.4"
|
version = "0.8.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f5dd552bbb95d578520ee68403bf8aaf0dbbb2ce55b0854d019f9350ad61040a"
|
checksum = "766bd7012aa5ba49411ebdf4e93bddd59b182d2918e085d58dec5bb9b54b7105"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cedarwood",
|
"cedarwood",
|
||||||
"fxhash",
|
|
||||||
"include-flate",
|
"include-flate",
|
||||||
"jieba-macros",
|
"jieba-macros",
|
||||||
"lazy_static",
|
"phf 0.13.1",
|
||||||
"phf",
|
|
||||||
"regex",
|
"regex",
|
||||||
|
"rustc-hash 2.1.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jiff"
|
||||||
|
version = "0.2.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
|
||||||
|
dependencies = [
|
||||||
|
"jiff-static",
|
||||||
|
"log",
|
||||||
|
"portable-atomic",
|
||||||
|
"portable-atomic-util",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jiff-static"
|
||||||
|
version = "0.2.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.106",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3487,7 +3461,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -3738,7 +3712,7 @@ dependencies = [
|
|||||||
"bincode 2.0.1",
|
"bincode 2.0.1",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"csv",
|
"csv",
|
||||||
"derive_builder 0.20.2",
|
"derive_builder",
|
||||||
"encoding",
|
"encoding",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"encoding_rs_io",
|
"encoding_rs_io",
|
||||||
@@ -3888,9 +3862,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lmdb-master-sys"
|
name = "lmdb-master-sys"
|
||||||
version = "0.2.6-nested-rtxns"
|
version = "0.2.6-nested-rtxns-6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f4ff85130e3c994b36877045fbbb138d521dea7197bfc19dc3d5d95101a8e20a"
|
checksum = "e113d9bf240f974fbe7fd516cbfd8c422e925c0655495501c7237548425493d0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"doxygen-rs",
|
"doxygen-rs",
|
||||||
@@ -3988,6 +3962,16 @@ version = "1.0.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "md-5"
|
||||||
|
version = "0.10.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"digest",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "md5"
|
name = "md5"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
@@ -3996,7 +3980,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5",
|
"md5",
|
||||||
@@ -4007,7 +3991,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@@ -4104,7 +4088,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@@ -4123,7 +4107,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -4158,7 +4142,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -4192,9 +4176,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"allocator-api2 0.3.1",
|
|
||||||
"arroy",
|
"arroy",
|
||||||
"bbqueue",
|
"bbqueue",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -4252,6 +4235,7 @@ dependencies = [
|
|||||||
"roaring 0.10.12",
|
"roaring 0.10.12",
|
||||||
"rstar",
|
"rstar",
|
||||||
"rustc-hash 2.1.1",
|
"rustc-hash 2.1.1",
|
||||||
|
"safetensors 0.6.2",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"slice-group-by",
|
"slice-group-by",
|
||||||
@@ -4773,7 +4757,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -4830,17 +4814,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_macros",
|
"phf_macros",
|
||||||
"phf_shared",
|
"phf_shared 0.11.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared 0.13.1",
|
||||||
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "phf_codegen"
|
name = "phf_codegen"
|
||||||
version = "0.11.3"
|
version = "0.13.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_generator",
|
"phf_generator 0.13.1",
|
||||||
"phf_shared",
|
"phf_shared 0.13.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -4849,18 +4843,28 @@ version = "0.11.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_shared",
|
"phf_shared 0.11.3",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_generator"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
|
||||||
|
dependencies = [
|
||||||
|
"fastrand",
|
||||||
|
"phf_shared 0.13.1",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "phf_macros"
|
name = "phf_macros"
|
||||||
version = "0.11.3"
|
version = "0.11.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_generator",
|
"phf_generator 0.11.3",
|
||||||
"phf_shared",
|
"phf_shared 0.11.3",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.106",
|
"syn 2.0.106",
|
||||||
@@ -4875,6 +4879,15 @@ dependencies = [
|
|||||||
"siphasher",
|
"siphasher",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_shared"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
|
||||||
|
dependencies = [
|
||||||
|
"siphasher",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pin-project"
|
name = "pin-project"
|
||||||
version = "1.1.10"
|
version = "1.1.10"
|
||||||
@@ -4962,6 +4975,15 @@ version = "1.11.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
|
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "portable-atomic-util"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
|
||||||
|
dependencies = [
|
||||||
|
"portable-atomic",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "potential_utf"
|
name = "potential_utf"
|
||||||
version = "0.1.3"
|
version = "0.1.3"
|
||||||
@@ -5139,6 +5161,16 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quick-xml"
|
||||||
|
version = "0.38.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quinn"
|
name = "quinn"
|
||||||
version = "0.11.9"
|
version = "0.11.9"
|
||||||
@@ -5314,12 +5346,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rayon-cond"
|
name = "rayon-cond"
|
||||||
version = "0.3.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
|
checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"either",
|
"either",
|
||||||
"itertools 0.11.0",
|
"itertools 0.14.0",
|
||||||
"rayon",
|
"rayon",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -5414,6 +5446,7 @@ dependencies = [
|
|||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
|
"h2 0.4.12",
|
||||||
"http 1.3.1",
|
"http 1.3.1",
|
||||||
"http-body",
|
"http-body",
|
||||||
"http-body-util",
|
"http-body-util",
|
||||||
@@ -5704,6 +5737,25 @@ version = "1.0.22"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rusty-s3"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fac2edd2f0b56bd79a7343f49afc01c2d41010df480538a510e0abc56044f66c"
|
||||||
|
dependencies = [
|
||||||
|
"base64 0.22.1",
|
||||||
|
"hmac",
|
||||||
|
"jiff",
|
||||||
|
"md-5",
|
||||||
|
"percent-encoding",
|
||||||
|
"quick-xml",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"sha2",
|
||||||
|
"url",
|
||||||
|
"zeroize",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ryu"
|
name = "ryu"
|
||||||
version = "1.0.20"
|
version = "1.0.20"
|
||||||
@@ -5720,6 +5772,16 @@ dependencies = [
|
|||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "safetensors"
|
||||||
|
version = "0.6.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "172dd94c5a87b5c79f945c863da53b2ebc7ccef4eca24ac63cca66a41aab2178"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "same-file"
|
name = "same-file"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
@@ -6201,12 +6263,6 @@ dependencies = [
|
|||||||
"indexmap",
|
"indexmap",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "strsim"
|
|
||||||
version = "0.10.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@@ -6532,21 +6588,24 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.15.2"
|
version = "0.22.1"
|
||||||
source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.15.2#701a73b869602b5639589d197e805349cdba3223"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6475a27088c98ea96d00b39a9ddfb63780d1ad4cceb6f48374349a96ab2b7842"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"derive_builder 0.12.0",
|
"compact_str",
|
||||||
|
"dary_heap",
|
||||||
|
"derive_builder",
|
||||||
"esaxx-rs",
|
"esaxx-rs",
|
||||||
"getrandom 0.2.16",
|
"getrandom 0.3.3",
|
||||||
"itertools 0.12.1",
|
"itertools 0.14.0",
|
||||||
"lazy_static",
|
|
||||||
"log",
|
"log",
|
||||||
"macro_rules_attribute",
|
"macro_rules_attribute",
|
||||||
"monostate",
|
"monostate",
|
||||||
"onig",
|
"onig",
|
||||||
"paste",
|
"paste",
|
||||||
"rand 0.8.5",
|
"rand 0.9.2",
|
||||||
"rayon",
|
"rayon",
|
||||||
"rayon-cond",
|
"rayon-cond",
|
||||||
"regex",
|
"regex",
|
||||||
@@ -6554,7 +6613,7 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"spm_precompiled",
|
"spm_precompiled",
|
||||||
"thiserror 1.0.69",
|
"thiserror 2.0.16",
|
||||||
"unicode-normalization-alignments",
|
"unicode-normalization-alignments",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unicode_categories",
|
"unicode_categories",
|
||||||
@@ -6916,7 +6975,7 @@ dependencies = [
|
|||||||
"num-traits",
|
"num-traits",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"rayon",
|
"rayon",
|
||||||
"safetensors",
|
"safetensors 0.4.5",
|
||||||
"serde",
|
"serde",
|
||||||
"thiserror 1.0.69",
|
"thiserror 1.0.69",
|
||||||
"tracing",
|
"tracing",
|
||||||
@@ -7146,7 +7205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "6b2bf58be11fc9414104c6d3a2e464163db5ef74b12296bda593cac37b6e4777"
|
checksum = "6b2bf58be11fc9414104c6d3a2e464163db5ef74b12296bda593cac37b6e4777"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"derive_builder 0.20.2",
|
"derive_builder",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
"vergen-lib",
|
"vergen-lib",
|
||||||
]
|
]
|
||||||
@@ -7158,7 +7217,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "4f6ee511ec45098eabade8a0750e76eec671e7fb2d9360c563911336bea9cac1"
|
checksum = "4f6ee511ec45098eabade8a0750e76eec671e7fb2d9360c563911336bea9cac1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"derive_builder 0.20.2",
|
"derive_builder",
|
||||||
"git2",
|
"git2",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
"time",
|
"time",
|
||||||
@@ -7173,7 +7232,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "9b07e6010c0f3e59fcb164e0163834597da68d1f864e2b8ca49f74de01e9c166"
|
checksum = "9b07e6010c0f3e59fcb164e0163834597da68d1f864e2b8ca49f74de01e9c166"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"derive_builder 0.20.2",
|
"derive_builder",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -7820,7 +7879,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.24.0"
|
version = "1.26.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
## 🖥 Examples
|
## 🖥 Examples
|
||||||
|
|
||||||
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
|
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
|
||||||
|
- [**Flickr**](https://flickr.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — Search and explore one hundred million Flickr images with semantic search.
|
||||||
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
|
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
|
||||||
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
|
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
|
||||||
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
|
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
|
||||||
|
|||||||
@@ -96,6 +96,8 @@ pub struct TaskDump {
|
|||||||
pub finished_at: Option<OffsetDateTime>,
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub network: Option<TaskNetwork>,
|
pub network: Option<TaskNetwork>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub custom_metadata: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
||||||
@@ -178,6 +180,7 @@ impl From<Task> for TaskDump {
|
|||||||
started_at: task.started_at,
|
started_at: task.started_at,
|
||||||
finished_at: task.finished_at,
|
finished_at: task.finished_at,
|
||||||
network: task.network,
|
network: task.network,
|
||||||
|
custom_metadata: task.custom_metadata,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -396,6 +399,7 @@ pub(crate) mod test {
|
|||||||
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
|
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
|
||||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||||
network: None,
|
network: None,
|
||||||
|
custom_metadata: None,
|
||||||
},
|
},
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
@@ -421,6 +425,7 @@ pub(crate) mod test {
|
|||||||
started_at: None,
|
started_at: None,
|
||||||
finished_at: None,
|
finished_at: None,
|
||||||
network: None,
|
network: None,
|
||||||
|
custom_metadata: None,
|
||||||
},
|
},
|
||||||
Some(vec![
|
Some(vec![
|
||||||
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
|
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
|
||||||
@@ -441,6 +446,7 @@ pub(crate) mod test {
|
|||||||
started_at: None,
|
started_at: None,
|
||||||
finished_at: None,
|
finished_at: None,
|
||||||
network: None,
|
network: None,
|
||||||
|
custom_metadata: None,
|
||||||
},
|
},
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -164,6 +164,7 @@ impl CompatV5ToV6 {
|
|||||||
started_at: task_view.started_at,
|
started_at: task_view.started_at,
|
||||||
finished_at: task_view.finished_at,
|
finished_at: task_view.finished_at,
|
||||||
network: None,
|
network: None,
|
||||||
|
custom_metadata: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
(task, content_file)
|
(task, content_file)
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ impl FileStore {
|
|||||||
|
|
||||||
/// Returns the file corresponding to the requested uuid.
|
/// Returns the file corresponding to the requested uuid.
|
||||||
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
|
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
|
||||||
let path = self.get_update_path(uuid);
|
let path = self.update_path(uuid);
|
||||||
let file = match StdFile::open(path) {
|
let file = match StdFile::open(path) {
|
||||||
Ok(file) => file,
|
Ok(file) => file,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -72,7 +72,7 @@ impl FileStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the path that correspond to this uuid, the path could not exists.
|
/// Returns the path that correspond to this uuid, the path could not exists.
|
||||||
pub fn get_update_path(&self, uuid: Uuid) -> PathBuf {
|
pub fn update_path(&self, uuid: Uuid) -> PathBuf {
|
||||||
self.path.join(uuid.to_string())
|
self.path.join(uuid.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ license.workspace = true
|
|||||||
anyhow = "1.0.98"
|
anyhow = "1.0.98"
|
||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
byte-unit = "5.1.6"
|
byte-unit = "5.1.6"
|
||||||
|
bytes = "1.10.1"
|
||||||
bumpalo = "3.18.1"
|
bumpalo = "3.18.1"
|
||||||
bumparaw-collections = "0.1.4"
|
bumparaw-collections = "0.1.4"
|
||||||
convert_case = "0.8.0"
|
convert_case = "0.8.0"
|
||||||
@@ -32,6 +33,7 @@ rayon = "1.10.0"
|
|||||||
roaring = { version = "0.10.12", features = ["serde"] }
|
roaring = { version = "0.10.12", features = ["serde"] }
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
|
tar = "0.4.44"
|
||||||
synchronoise = "1.0.1"
|
synchronoise = "1.0.1"
|
||||||
tempfile = "3.20.0"
|
tempfile = "3.20.0"
|
||||||
thiserror = "2.0.12"
|
thiserror = "2.0.12"
|
||||||
@@ -45,6 +47,9 @@ tracing = "0.1.41"
|
|||||||
ureq = "2.12.1"
|
ureq = "2.12.1"
|
||||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||||
backoff = "0.4.0"
|
backoff = "0.4.0"
|
||||||
|
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
|
||||||
|
rusty-s3 = "0.8.1"
|
||||||
|
tokio = { version = "1.47.1", features = ["full"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
|
|||||||
@@ -150,6 +150,7 @@ impl<'a> Dump<'a> {
|
|||||||
details: task.details,
|
details: task.details,
|
||||||
status: task.status,
|
status: task.status,
|
||||||
network: task.network,
|
network: task.network,
|
||||||
|
custom_metadata: task.custom_metadata,
|
||||||
kind: match task.kind {
|
kind: match task.kind {
|
||||||
KindDump::DocumentImport {
|
KindDump::DocumentImport {
|
||||||
primary_key,
|
primary_key,
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ use meilisearch_types::error::{Code, ErrorCode};
|
|||||||
use meilisearch_types::milli::index::RollbackOutcome;
|
use meilisearch_types::milli::index::RollbackOutcome;
|
||||||
use meilisearch_types::tasks::{Kind, Status};
|
use meilisearch_types::tasks::{Kind, Status};
|
||||||
use meilisearch_types::{heed, milli};
|
use meilisearch_types::{heed, milli};
|
||||||
|
use reqwest::StatusCode;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::TaskId;
|
use crate::TaskId;
|
||||||
@@ -127,6 +128,14 @@ pub enum Error {
|
|||||||
#[error("Aborted task")]
|
#[error("Aborted task")]
|
||||||
AbortedTask,
|
AbortedTask,
|
||||||
|
|
||||||
|
#[error("S3 error: status: {status}, body: {body}")]
|
||||||
|
S3Error { status: StatusCode, body: String },
|
||||||
|
#[error("S3 HTTP error: {0}")]
|
||||||
|
S3HttpError(reqwest::Error),
|
||||||
|
#[error("S3 XML error: {0}")]
|
||||||
|
S3XmlError(Box<dyn std::error::Error + Send + Sync>),
|
||||||
|
#[error("S3 bucket error: {0}")]
|
||||||
|
S3BucketError(rusty_s3::BucketError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Dump(#[from] dump::Error),
|
Dump(#[from] dump::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@@ -226,6 +235,10 @@ impl Error {
|
|||||||
| Error::TaskCancelationWithEmptyQuery
|
| Error::TaskCancelationWithEmptyQuery
|
||||||
| Error::FromRemoteWhenExporting { .. }
|
| Error::FromRemoteWhenExporting { .. }
|
||||||
| Error::AbortedTask
|
| Error::AbortedTask
|
||||||
|
| Error::S3Error { .. }
|
||||||
|
| Error::S3HttpError(_)
|
||||||
|
| Error::S3XmlError(_)
|
||||||
|
| Error::S3BucketError(_)
|
||||||
| Error::Dump(_)
|
| Error::Dump(_)
|
||||||
| Error::Heed(_)
|
| Error::Heed(_)
|
||||||
| Error::Milli { .. }
|
| Error::Milli { .. }
|
||||||
@@ -293,8 +306,14 @@ impl ErrorCode for Error {
|
|||||||
Error::BatchNotFound(_) => Code::BatchNotFound,
|
Error::BatchNotFound(_) => Code::BatchNotFound,
|
||||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||||
// TODO: not sure of the Code to use
|
|
||||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||||
|
Error::S3Error { status, .. } if status.is_client_error() => {
|
||||||
|
Code::InvalidS3SnapshotRequest
|
||||||
|
}
|
||||||
|
Error::S3Error { .. } => Code::S3SnapshotServerError,
|
||||||
|
Error::S3HttpError(_) => Code::S3SnapshotServerError,
|
||||||
|
Error::S3XmlError(_) => Code::S3SnapshotServerError,
|
||||||
|
Error::S3BucketError(_) => Code::InvalidS3SnapshotParameters,
|
||||||
Error::Dump(e) => e.error_code(),
|
Error::Dump(e) => e.error_code(),
|
||||||
Error::Milli { error, .. } => error.error_code(),
|
Error::Milli { error, .. } => error.error_code(),
|
||||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
|||||||
run_loop_iteration: _,
|
run_loop_iteration: _,
|
||||||
embedders: _,
|
embedders: _,
|
||||||
chat_settings: _,
|
chat_settings: _,
|
||||||
|
runtime: _,
|
||||||
} = scheduler;
|
} = scheduler;
|
||||||
|
|
||||||
let rtxn = env.read_txn().unwrap();
|
let rtxn = env.read_txn().unwrap();
|
||||||
@@ -231,6 +232,7 @@ pub fn snapshot_task(task: &Task) -> String {
|
|||||||
status,
|
status,
|
||||||
kind,
|
kind,
|
||||||
network,
|
network,
|
||||||
|
custom_metadata,
|
||||||
} = task;
|
} = task;
|
||||||
snap.push('{');
|
snap.push('{');
|
||||||
snap.push_str(&format!("uid: {uid}, "));
|
snap.push_str(&format!("uid: {uid}, "));
|
||||||
@@ -251,6 +253,9 @@ pub fn snapshot_task(task: &Task) -> String {
|
|||||||
if let Some(network) = network {
|
if let Some(network) = network {
|
||||||
snap.push_str(&format!("network: {network:?}, "))
|
snap.push_str(&format!("network: {network:?}, "))
|
||||||
}
|
}
|
||||||
|
if let Some(custom_metadata) = custom_metadata {
|
||||||
|
snap.push_str(&format!("custom_metadata: {custom_metadata:?}"))
|
||||||
|
}
|
||||||
|
|
||||||
snap.push('}');
|
snap.push('}');
|
||||||
snap
|
snap
|
||||||
|
|||||||
@@ -216,6 +216,9 @@ pub struct IndexScheduler {
|
|||||||
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
|
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
run_loop_iteration: Arc<RwLock<usize>>,
|
run_loop_iteration: Arc<RwLock<usize>>,
|
||||||
|
|
||||||
|
/// The tokio runtime used for asynchronous tasks.
|
||||||
|
runtime: Option<tokio::runtime::Handle>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexScheduler {
|
impl IndexScheduler {
|
||||||
@@ -242,6 +245,7 @@ impl IndexScheduler {
|
|||||||
run_loop_iteration: self.run_loop_iteration.clone(),
|
run_loop_iteration: self.run_loop_iteration.clone(),
|
||||||
features: self.features.clone(),
|
features: self.features.clone(),
|
||||||
chat_settings: self.chat_settings,
|
chat_settings: self.chat_settings,
|
||||||
|
runtime: self.runtime.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -255,13 +259,23 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Create an index scheduler and start its run loop.
|
/// Create an index scheduler and start its run loop.
|
||||||
#[allow(private_interfaces)] // because test_utils is private
|
|
||||||
pub fn new(
|
pub fn new(
|
||||||
options: IndexSchedulerOptions,
|
options: IndexSchedulerOptions,
|
||||||
auth_env: Env<WithoutTls>,
|
auth_env: Env<WithoutTls>,
|
||||||
from_db_version: (u32, u32, u32),
|
from_db_version: (u32, u32, u32),
|
||||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
runtime: Option<tokio::runtime::Handle>,
|
||||||
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
) -> Result<Self> {
|
||||||
|
let this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
|
||||||
|
|
||||||
|
this.run();
|
||||||
|
Ok(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_without_run(
|
||||||
|
options: IndexSchedulerOptions,
|
||||||
|
auth_env: Env<WithoutTls>,
|
||||||
|
from_db_version: (u32, u32, u32),
|
||||||
|
runtime: Option<tokio::runtime::Handle>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
std::fs::create_dir_all(&options.tasks_path)?;
|
std::fs::create_dir_all(&options.tasks_path)?;
|
||||||
std::fs::create_dir_all(&options.update_file_path)?;
|
std::fs::create_dir_all(&options.update_file_path)?;
|
||||||
@@ -316,8 +330,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
Ok(Self {
|
||||||
let this = Self {
|
|
||||||
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
|
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
|
||||||
version,
|
version,
|
||||||
queue,
|
queue,
|
||||||
@@ -333,15 +346,32 @@ impl IndexScheduler {
|
|||||||
webhooks: Arc::new(webhooks),
|
webhooks: Arc::new(webhooks),
|
||||||
embedders: Default::default(),
|
embedders: Default::default(),
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||||
test_breakpoint_sdr,
|
test_breakpoint_sdr: crossbeam_channel::bounded(0).0,
|
||||||
#[cfg(test)]
|
#[cfg(test)] // Will be replaced in `new_tests` in test environments
|
||||||
planned_failures,
|
planned_failures: Default::default(),
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
run_loop_iteration: Arc::new(RwLock::new(0)),
|
run_loop_iteration: Arc::new(RwLock::new(0)),
|
||||||
features,
|
features,
|
||||||
chat_settings,
|
chat_settings,
|
||||||
};
|
runtime,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an index scheduler and start its run loop.
|
||||||
|
#[cfg(test)]
|
||||||
|
fn new_test(
|
||||||
|
options: IndexSchedulerOptions,
|
||||||
|
auth_env: Env<WithoutTls>,
|
||||||
|
from_db_version: (u32, u32, u32),
|
||||||
|
runtime: Option<tokio::runtime::Handle>,
|
||||||
|
test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
||||||
|
planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let mut this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
|
||||||
|
|
||||||
|
this.test_breakpoint_sdr = test_breakpoint_sdr;
|
||||||
|
this.planned_failures = planned_failures;
|
||||||
|
|
||||||
this.run();
|
this.run();
|
||||||
Ok(this)
|
Ok(this)
|
||||||
@@ -726,6 +756,19 @@ impl IndexScheduler {
|
|||||||
kind: KindWithContent,
|
kind: KindWithContent,
|
||||||
task_id: Option<TaskId>,
|
task_id: Option<TaskId>,
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
|
) -> Result<Task> {
|
||||||
|
self.register_with_custom_metadata(kind, task_id, None, dry_run)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register a new task in the scheduler, with metadata.
|
||||||
|
///
|
||||||
|
/// If it fails and data was associated with the task, it tries to delete the associated data.
|
||||||
|
pub fn register_with_custom_metadata(
|
||||||
|
&self,
|
||||||
|
kind: KindWithContent,
|
||||||
|
task_id: Option<TaskId>,
|
||||||
|
custom_metadata: Option<String>,
|
||||||
|
dry_run: bool,
|
||||||
) -> Result<Task> {
|
) -> Result<Task> {
|
||||||
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
|
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
|
||||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
|
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
|
||||||
@@ -736,7 +779,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut wtxn = self.env.write_txn()?;
|
let mut wtxn = self.env.write_txn()?;
|
||||||
let task = self.queue.register(&mut wtxn, &kind, task_id, dry_run)?;
|
let task = self.queue.register(&mut wtxn, &kind, task_id, custom_metadata, dry_run)?;
|
||||||
|
|
||||||
// If the registered task is a task cancelation
|
// If the registered task is a task cancelation
|
||||||
// we inform the processing tasks to stop (if necessary).
|
// we inform the processing tasks to stop (if necessary).
|
||||||
|
|||||||
@@ -257,6 +257,7 @@ impl Queue {
|
|||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
kind: &KindWithContent,
|
kind: &KindWithContent,
|
||||||
task_id: Option<TaskId>,
|
task_id: Option<TaskId>,
|
||||||
|
custom_metadata: Option<String>,
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
) -> Result<Task> {
|
) -> Result<Task> {
|
||||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||||
@@ -280,6 +281,7 @@ impl Queue {
|
|||||||
status: Status::Enqueued,
|
status: Status::Enqueued,
|
||||||
kind: kind.clone(),
|
kind: kind.clone(),
|
||||||
network: None,
|
network: None,
|
||||||
|
custom_metadata,
|
||||||
};
|
};
|
||||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||||
@@ -344,6 +346,7 @@ impl Queue {
|
|||||||
tasks: to_delete,
|
tasks: to_delete,
|
||||||
},
|
},
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
false,
|
false,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ use convert_case::{Case, Casing as _};
|
|||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::heed::{Env, WithoutTls};
|
use meilisearch_types::heed::{Env, WithoutTls};
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
|
use meilisearch_types::milli::update::S3SnapshotOptions;
|
||||||
use meilisearch_types::tasks::Status;
|
use meilisearch_types::tasks::Status;
|
||||||
use process_batch::ProcessBatchInfo;
|
use process_batch::ProcessBatchInfo;
|
||||||
use rayon::current_num_threads;
|
use rayon::current_num_threads;
|
||||||
@@ -87,11 +88,14 @@ pub struct Scheduler {
|
|||||||
|
|
||||||
/// Snapshot compaction status.
|
/// Snapshot compaction status.
|
||||||
pub(crate) experimental_no_snapshot_compaction: bool,
|
pub(crate) experimental_no_snapshot_compaction: bool,
|
||||||
|
|
||||||
|
/// S3 Snapshot options.
|
||||||
|
pub(crate) s3_snapshot_options: Option<S3SnapshotOptions>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Scheduler {
|
impl Scheduler {
|
||||||
pub(crate) fn private_clone(&self) -> Scheduler {
|
pub(crate) fn private_clone(&self) -> Self {
|
||||||
Scheduler {
|
Self {
|
||||||
must_stop_processing: self.must_stop_processing.clone(),
|
must_stop_processing: self.must_stop_processing.clone(),
|
||||||
wake_up: self.wake_up.clone(),
|
wake_up: self.wake_up.clone(),
|
||||||
autobatching_enabled: self.autobatching_enabled,
|
autobatching_enabled: self.autobatching_enabled,
|
||||||
@@ -103,23 +107,52 @@ impl Scheduler {
|
|||||||
version_file_path: self.version_file_path.clone(),
|
version_file_path: self.version_file_path.clone(),
|
||||||
embedding_cache_cap: self.embedding_cache_cap,
|
embedding_cache_cap: self.embedding_cache_cap,
|
||||||
experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction,
|
experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction,
|
||||||
|
s3_snapshot_options: self.s3_snapshot_options.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
|
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
|
||||||
|
let IndexSchedulerOptions {
|
||||||
|
version_file_path,
|
||||||
|
auth_path: _,
|
||||||
|
tasks_path: _,
|
||||||
|
update_file_path: _,
|
||||||
|
indexes_path: _,
|
||||||
|
snapshots_path,
|
||||||
|
dumps_path,
|
||||||
|
cli_webhook_url: _,
|
||||||
|
cli_webhook_authorization: _,
|
||||||
|
task_db_size: _,
|
||||||
|
index_base_map_size: _,
|
||||||
|
enable_mdb_writemap: _,
|
||||||
|
index_growth_amount: _,
|
||||||
|
index_count: _,
|
||||||
|
indexer_config,
|
||||||
|
autobatching_enabled,
|
||||||
|
cleanup_enabled: _,
|
||||||
|
max_number_of_tasks: _,
|
||||||
|
max_number_of_batched_tasks,
|
||||||
|
batched_tasks_size_limit,
|
||||||
|
instance_features: _,
|
||||||
|
auto_upgrade: _,
|
||||||
|
embedding_cache_cap,
|
||||||
|
experimental_no_snapshot_compaction,
|
||||||
|
} = options;
|
||||||
|
|
||||||
Scheduler {
|
Scheduler {
|
||||||
must_stop_processing: MustStopProcessing::default(),
|
must_stop_processing: MustStopProcessing::default(),
|
||||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||||
autobatching_enabled: options.autobatching_enabled,
|
autobatching_enabled: *autobatching_enabled,
|
||||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
max_number_of_batched_tasks: *max_number_of_batched_tasks,
|
||||||
batched_tasks_size_limit: options.batched_tasks_size_limit,
|
batched_tasks_size_limit: *batched_tasks_size_limit,
|
||||||
dumps_path: options.dumps_path.clone(),
|
dumps_path: dumps_path.clone(),
|
||||||
snapshots_path: options.snapshots_path.clone(),
|
snapshots_path: snapshots_path.clone(),
|
||||||
auth_env,
|
auth_env,
|
||||||
version_file_path: options.version_file_path.clone(),
|
version_file_path: version_file_path.clone(),
|
||||||
embedding_cache_cap: options.embedding_cache_cap,
|
embedding_cache_cap: *embedding_cache_cap,
|
||||||
experimental_no_snapshot_compaction: options.experimental_no_snapshot_compaction,
|
experimental_no_snapshot_compaction: *experimental_no_snapshot_compaction,
|
||||||
|
s3_snapshot_options: indexer_config.s3_snapshot_options.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
|
|||||||
use crate::queue::TaskQueue;
|
use crate::queue::TaskQueue;
|
||||||
use crate::{Error, IndexScheduler, Result};
|
use crate::{Error, IndexScheduler, Result};
|
||||||
|
|
||||||
|
const UPDATE_FILES_DIR_NAME: &str = "update_files";
|
||||||
|
|
||||||
/// # Safety
|
/// # Safety
|
||||||
///
|
///
|
||||||
/// See [`EnvOpenOptions::open`].
|
/// See [`EnvOpenOptions::open`].
|
||||||
@@ -78,10 +80,32 @@ impl IndexScheduler {
|
|||||||
pub(super) fn process_snapshot(
|
pub(super) fn process_snapshot(
|
||||||
&self,
|
&self,
|
||||||
progress: Progress,
|
progress: Progress,
|
||||||
mut tasks: Vec<Task>,
|
tasks: Vec<Task>,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
||||||
|
|
||||||
|
match self.scheduler.s3_snapshot_options.clone() {
|
||||||
|
Some(options) => {
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
{
|
||||||
|
let _ = options;
|
||||||
|
panic!("Non-unix platform does not support S3 snapshotting");
|
||||||
|
}
|
||||||
|
#[cfg(unix)]
|
||||||
|
self.runtime
|
||||||
|
.as_ref()
|
||||||
|
.expect("Runtime not initialized")
|
||||||
|
.block_on(self.process_snapshot_to_s3(progress, options, tasks))
|
||||||
|
}
|
||||||
|
None => self.process_snapshots_to_disk(progress, tasks),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_snapshots_to_disk(
|
||||||
|
&self,
|
||||||
|
progress: Progress,
|
||||||
|
mut tasks: Vec<Task>,
|
||||||
|
) -> Result<Vec<Task>, Error> {
|
||||||
fs::create_dir_all(&self.scheduler.snapshots_path)?;
|
fs::create_dir_all(&self.scheduler.snapshots_path)?;
|
||||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||||
|
|
||||||
@@ -128,7 +152,7 @@ impl IndexScheduler {
|
|||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
|
|
||||||
// 2.4 Create the update files directory
|
// 2.4 Create the update files directory
|
||||||
let update_files_dir = temp_snapshot_dir.path().join("update_files");
|
let update_files_dir = temp_snapshot_dir.path().join(UPDATE_FILES_DIR_NAME);
|
||||||
fs::create_dir_all(&update_files_dir)?;
|
fs::create_dir_all(&update_files_dir)?;
|
||||||
|
|
||||||
// 2.5 Only copy the update files of the enqueued tasks
|
// 2.5 Only copy the update files of the enqueued tasks
|
||||||
@@ -140,7 +164,7 @@ impl IndexScheduler {
|
|||||||
let task =
|
let task =
|
||||||
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
if let Some(content_uuid) = task.content_uuid() {
|
if let Some(content_uuid) = task.content_uuid() {
|
||||||
let src = self.queue.file_store.get_update_path(content_uuid);
|
let src = self.queue.file_store.update_path(content_uuid);
|
||||||
let dst = update_files_dir.join(content_uuid.to_string());
|
let dst = update_files_dir.join(content_uuid.to_string());
|
||||||
fs::copy(src, dst)?;
|
fs::copy(src, dst)?;
|
||||||
}
|
}
|
||||||
@@ -206,4 +230,403 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub(super) async fn process_snapshot_to_s3(
|
||||||
|
&self,
|
||||||
|
progress: Progress,
|
||||||
|
opts: meilisearch_types::milli::update::S3SnapshotOptions,
|
||||||
|
mut tasks: Vec<Task>,
|
||||||
|
) -> Result<Vec<Task>> {
|
||||||
|
use meilisearch_types::milli::update::S3SnapshotOptions;
|
||||||
|
|
||||||
|
let S3SnapshotOptions {
|
||||||
|
s3_bucket_url,
|
||||||
|
s3_bucket_region,
|
||||||
|
s3_bucket_name,
|
||||||
|
s3_snapshot_prefix,
|
||||||
|
s3_access_key,
|
||||||
|
s3_secret_key,
|
||||||
|
s3_max_in_flight_parts,
|
||||||
|
s3_compression_level: level,
|
||||||
|
s3_signature_duration,
|
||||||
|
s3_multipart_part_size,
|
||||||
|
} = opts;
|
||||||
|
|
||||||
|
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||||
|
let retry_backoff = backoff::ExponentialBackoff::default();
|
||||||
|
let db_name = {
|
||||||
|
let mut base_path = self.env.path().to_owned();
|
||||||
|
base_path.pop();
|
||||||
|
base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms").to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let (reader, writer) = std::io::pipe()?;
|
||||||
|
let uploader_task = tokio::spawn(multipart_stream_to_s3(
|
||||||
|
s3_bucket_url,
|
||||||
|
s3_bucket_region,
|
||||||
|
s3_bucket_name,
|
||||||
|
s3_snapshot_prefix,
|
||||||
|
s3_access_key,
|
||||||
|
s3_secret_key,
|
||||||
|
s3_max_in_flight_parts,
|
||||||
|
s3_signature_duration,
|
||||||
|
s3_multipart_part_size,
|
||||||
|
must_stop_processing,
|
||||||
|
retry_backoff,
|
||||||
|
db_name,
|
||||||
|
reader,
|
||||||
|
));
|
||||||
|
|
||||||
|
let index_scheduler = IndexScheduler::private_clone(self);
|
||||||
|
let builder_task = tokio::task::spawn_blocking(move || {
|
||||||
|
stream_tarball_into_pipe(progress, level, writer, index_scheduler)
|
||||||
|
});
|
||||||
|
|
||||||
|
let (uploader_result, builder_result) = tokio::join!(uploader_task, builder_task);
|
||||||
|
|
||||||
|
// Check uploader result first to early return on task abortion.
|
||||||
|
// safety: JoinHandle can return an error if the task was aborted, cancelled, or panicked.
|
||||||
|
uploader_result.unwrap()?;
|
||||||
|
builder_result.unwrap()?;
|
||||||
|
|
||||||
|
for task in &mut tasks {
|
||||||
|
task.status = Status::Succeeded;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(tasks)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Streams a tarball of the database content into a pipe.
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn stream_tarball_into_pipe(
|
||||||
|
progress: Progress,
|
||||||
|
level: u32,
|
||||||
|
writer: std::io::PipeWriter,
|
||||||
|
index_scheduler: IndexScheduler,
|
||||||
|
) -> std::result::Result<(), Error> {
|
||||||
|
use std::io::Write as _;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
let writer = flate2::write::GzEncoder::new(writer, flate2::Compression::new(level));
|
||||||
|
let mut tarball = tar::Builder::new(writer);
|
||||||
|
|
||||||
|
// 1. Snapshot the version file
|
||||||
|
tarball
|
||||||
|
.append_path_with_name(&index_scheduler.scheduler.version_file_path, VERSION_FILE_NAME)?;
|
||||||
|
|
||||||
|
// 2. Snapshot the index scheduler LMDB env
|
||||||
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||||
|
let tasks_env_file = index_scheduler.env.try_clone_inner_file()?;
|
||||||
|
let path = Path::new("tasks").join("data.mdb");
|
||||||
|
append_file_to_tarball(&mut tarball, path, tasks_env_file)?;
|
||||||
|
|
||||||
|
// 2.3 Create a read transaction on the index-scheduler
|
||||||
|
let rtxn = index_scheduler.env.read_txn()?;
|
||||||
|
|
||||||
|
// 2.4 Create the update files directory
|
||||||
|
// And only copy the update files of the enqueued tasks
|
||||||
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||||
|
let enqueued = index_scheduler.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||||
|
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||||
|
progress.update_progress(update_file_progress);
|
||||||
|
|
||||||
|
// We create the update_files directory so that it
|
||||||
|
// always exists even if there are no update files
|
||||||
|
let update_files_dir = Path::new(UPDATE_FILES_DIR_NAME);
|
||||||
|
let src_update_files_dir = {
|
||||||
|
let mut path = index_scheduler.env.path().to_path_buf();
|
||||||
|
path.pop();
|
||||||
|
path.join(UPDATE_FILES_DIR_NAME)
|
||||||
|
};
|
||||||
|
tarball.append_dir(update_files_dir, src_update_files_dir)?;
|
||||||
|
|
||||||
|
for task_id in enqueued {
|
||||||
|
let task = index_scheduler
|
||||||
|
.queue
|
||||||
|
.tasks
|
||||||
|
.get_task(&rtxn, task_id)?
|
||||||
|
.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
|
if let Some(content_uuid) = task.content_uuid() {
|
||||||
|
use std::fs::File;
|
||||||
|
|
||||||
|
let src = index_scheduler.queue.file_store.update_path(content_uuid);
|
||||||
|
let mut update_file = File::open(src)?;
|
||||||
|
let path = update_files_dir.join(content_uuid.to_string());
|
||||||
|
tarball.append_file(path, &mut update_file)?;
|
||||||
|
}
|
||||||
|
atomic.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Snapshot every indexes
|
||||||
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||||
|
let index_mapping = index_scheduler.index_mapper.index_mapping;
|
||||||
|
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||||
|
let indexes_dir = Path::new("indexes");
|
||||||
|
let indexes_references: Vec<_> = index_scheduler
|
||||||
|
.index_mapper
|
||||||
|
.index_mapping
|
||||||
|
.iter(&rtxn)?
|
||||||
|
.map(|res| res.map_err(Error::from).map(|(name, uuid)| (name.to_string(), uuid)))
|
||||||
|
.collect::<Result<_, Error>>()?;
|
||||||
|
|
||||||
|
// It's prettier to use a for loop instead of the IndexMapper::try_for_each_index
|
||||||
|
// method, especially when we need to access the UUID, local path and index number.
|
||||||
|
for (i, (name, uuid)) in indexes_references.into_iter().enumerate() {
|
||||||
|
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
|
||||||
|
&name, i as u32, nb_indexes,
|
||||||
|
));
|
||||||
|
let path = indexes_dir.join(uuid.to_string()).join("data.mdb");
|
||||||
|
let index = index_scheduler.index_mapper.index(&rtxn, &name)?;
|
||||||
|
let index_file = index.try_clone_inner_file()?;
|
||||||
|
tracing::trace!("Appending index file for {name} in {}", path.display());
|
||||||
|
append_file_to_tarball(&mut tarball, path, index_file)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
drop(rtxn);
|
||||||
|
|
||||||
|
// 4. Snapshot the auth LMDB env
|
||||||
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||||
|
let auth_env_file = index_scheduler.scheduler.auth_env.try_clone_inner_file()?;
|
||||||
|
let path = Path::new("auth").join("data.mdb");
|
||||||
|
append_file_to_tarball(&mut tarball, path, auth_env_file)?;
|
||||||
|
|
||||||
|
let mut gzencoder = tarball.into_inner()?;
|
||||||
|
gzencoder.flush()?;
|
||||||
|
gzencoder.try_finish()?;
|
||||||
|
let mut writer = gzencoder.finish()?;
|
||||||
|
writer.flush()?;
|
||||||
|
|
||||||
|
Result::<_, Error>::Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn append_file_to_tarball<W, P>(
|
||||||
|
tarball: &mut tar::Builder<W>,
|
||||||
|
path: P,
|
||||||
|
mut auth_env_file: fs::File,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
W: std::io::Write,
|
||||||
|
P: AsRef<std::path::Path>,
|
||||||
|
{
|
||||||
|
use std::io::{Seek as _, SeekFrom};
|
||||||
|
|
||||||
|
// Note: A previous snapshot operation may have left the cursor
|
||||||
|
// at the end of the file so we need to seek to the start.
|
||||||
|
auth_env_file.seek(SeekFrom::Start(0))?;
|
||||||
|
tarball.append_file(path, &mut auth_env_file)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Streams the content read from the given reader to S3.
|
||||||
|
#[cfg(unix)]
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
async fn multipart_stream_to_s3(
|
||||||
|
s3_bucket_url: String,
|
||||||
|
s3_bucket_region: String,
|
||||||
|
s3_bucket_name: String,
|
||||||
|
s3_snapshot_prefix: String,
|
||||||
|
s3_access_key: String,
|
||||||
|
s3_secret_key: String,
|
||||||
|
s3_max_in_flight_parts: std::num::NonZero<usize>,
|
||||||
|
s3_signature_duration: std::time::Duration,
|
||||||
|
s3_multipart_part_size: u64,
|
||||||
|
must_stop_processing: super::MustStopProcessing,
|
||||||
|
retry_backoff: backoff::exponential::ExponentialBackoff<backoff::SystemClock>,
|
||||||
|
db_name: String,
|
||||||
|
reader: std::io::PipeReader,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
|
||||||
|
|
||||||
|
use bytes::{Bytes, BytesMut};
|
||||||
|
use reqwest::{Client, Response};
|
||||||
|
use rusty_s3::S3Action as _;
|
||||||
|
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
|
||||||
|
use tokio::task::JoinHandle;
|
||||||
|
|
||||||
|
let reader = OwnedFd::from(reader);
|
||||||
|
let reader = tokio::net::unix::pipe::Receiver::from_owned_fd(reader)?;
|
||||||
|
let s3_snapshot_prefix = PathBuf::from(s3_snapshot_prefix);
|
||||||
|
let url =
|
||||||
|
s3_bucket_url.parse().map_err(BucketError::ParseError).map_err(Error::S3BucketError)?;
|
||||||
|
let bucket = Bucket::new(url, UrlStyle::Path, s3_bucket_name, s3_bucket_region)
|
||||||
|
.map_err(Error::S3BucketError)?;
|
||||||
|
let credential = Credentials::new(s3_access_key, s3_secret_key);
|
||||||
|
|
||||||
|
// Note for the future (rust 1.91+): use with_added_extension, it's prettier
|
||||||
|
let object_path = s3_snapshot_prefix.join(format!("{db_name}.snapshot"));
|
||||||
|
// Note: It doesn't work on Windows and if a port to this platform is needed,
|
||||||
|
// use the slash-path crate or similar to get the correct path separator.
|
||||||
|
let object = object_path.display().to_string();
|
||||||
|
|
||||||
|
let action = bucket.create_multipart_upload(Some(&credential), &object);
|
||||||
|
let url = action.sign(s3_signature_duration);
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
let resp = client.post(url).send().await.map_err(Error::S3HttpError)?;
|
||||||
|
let status = resp.status();
|
||||||
|
|
||||||
|
let body = match resp.error_for_status_ref() {
|
||||||
|
Ok(_) => resp.text().await.map_err(Error::S3HttpError)?,
|
||||||
|
Err(_) => {
|
||||||
|
return Err(Error::S3Error { status, body: resp.text().await.unwrap_or_default() })
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let multipart =
|
||||||
|
CreateMultipartUpload::parse_response(&body).map_err(|e| Error::S3XmlError(Box::new(e)))?;
|
||||||
|
tracing::debug!("Starting the upload of the snapshot to {object}");
|
||||||
|
|
||||||
|
// We use this bumpalo for etags strings.
|
||||||
|
let bump = bumpalo::Bump::new();
|
||||||
|
let mut etags = Vec::<&str>::new();
|
||||||
|
let mut in_flight = VecDeque::<(JoinHandle<reqwest::Result<Response>>, Bytes)>::with_capacity(
|
||||||
|
s3_max_in_flight_parts.get(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Part numbers start at 1 and cannot be larger than 10k
|
||||||
|
for part_number in 1u16.. {
|
||||||
|
if must_stop_processing.get() {
|
||||||
|
return Err(Error::AbortedTask);
|
||||||
|
}
|
||||||
|
|
||||||
|
let part_upload =
|
||||||
|
bucket.upload_part(Some(&credential), &object, part_number, multipart.upload_id());
|
||||||
|
let url = part_upload.sign(s3_signature_duration);
|
||||||
|
|
||||||
|
// Wait for a buffer to be ready if there are in-flight parts that landed
|
||||||
|
let mut buffer = if in_flight.len() >= s3_max_in_flight_parts.get() {
|
||||||
|
let (handle, buffer) = in_flight.pop_front().expect("At least one in flight request");
|
||||||
|
let resp = join_and_map_error(handle).await?;
|
||||||
|
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
|
||||||
|
|
||||||
|
let mut buffer = match buffer.try_into_mut() {
|
||||||
|
Ok(buffer) => buffer,
|
||||||
|
Err(_) => unreachable!("All bytes references were consumed in the task"),
|
||||||
|
};
|
||||||
|
buffer.clear();
|
||||||
|
buffer
|
||||||
|
} else {
|
||||||
|
BytesMut::with_capacity(s3_multipart_part_size as usize)
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we successfully read enough bytes,
|
||||||
|
// we can continue and send the buffer/part
|
||||||
|
while buffer.len() < (s3_multipart_part_size as usize / 2) {
|
||||||
|
// Wait for the pipe to be readable
|
||||||
|
|
||||||
|
use std::io;
|
||||||
|
reader.readable().await?;
|
||||||
|
|
||||||
|
match reader.try_read_buf(&mut buffer) {
|
||||||
|
Ok(0) => break,
|
||||||
|
// We read some bytes but maybe not enough
|
||||||
|
Ok(_) => continue,
|
||||||
|
// The readiness event is a false positive.
|
||||||
|
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if buffer.is_empty() {
|
||||||
|
// Break the loop if the buffer is
|
||||||
|
// empty after we tried to read bytes
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let body = buffer.freeze();
|
||||||
|
tracing::trace!("Sending part {part_number}");
|
||||||
|
let task = tokio::spawn({
|
||||||
|
let client = client.clone();
|
||||||
|
let body = body.clone();
|
||||||
|
backoff::future::retry(retry_backoff.clone(), move || {
|
||||||
|
let client = client.clone();
|
||||||
|
let url = url.clone();
|
||||||
|
let body = body.clone();
|
||||||
|
async move {
|
||||||
|
match client.put(url).body(body).send().await {
|
||||||
|
Ok(resp) if resp.status().is_client_error() => {
|
||||||
|
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||||
|
}
|
||||||
|
Ok(resp) => Ok(resp),
|
||||||
|
Err(e) => Err(backoff::Error::transient(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
in_flight.push_back((task, body));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (handle, _buffer) in in_flight {
|
||||||
|
let resp = join_and_map_error(handle).await?;
|
||||||
|
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::debug!("Finalizing the multipart upload");
|
||||||
|
|
||||||
|
let action = bucket.complete_multipart_upload(
|
||||||
|
Some(&credential),
|
||||||
|
&object,
|
||||||
|
multipart.upload_id(),
|
||||||
|
etags.iter().map(AsRef::as_ref),
|
||||||
|
);
|
||||||
|
let url = action.sign(s3_signature_duration);
|
||||||
|
let body = action.body();
|
||||||
|
let resp = backoff::future::retry(retry_backoff, move || {
|
||||||
|
let client = client.clone();
|
||||||
|
let url = url.clone();
|
||||||
|
let body = body.clone();
|
||||||
|
async move {
|
||||||
|
match client.post(url).body(body).send().await {
|
||||||
|
Ok(resp) if resp.status().is_client_error() => {
|
||||||
|
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||||
|
}
|
||||||
|
Ok(resp) => Ok(resp),
|
||||||
|
Err(e) => Err(backoff::Error::transient(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(Error::S3HttpError)?;
|
||||||
|
|
||||||
|
let status = resp.status();
|
||||||
|
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
|
||||||
|
if status.is_success() {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(Error::S3Error { status, body })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
async fn join_and_map_error(
|
||||||
|
join_handle: tokio::task::JoinHandle<Result<reqwest::Response, reqwest::Error>>,
|
||||||
|
) -> Result<reqwest::Response> {
|
||||||
|
// safety: Panic happens if the task (JoinHandle) was aborted, cancelled, or panicked
|
||||||
|
let request = join_handle.await.unwrap();
|
||||||
|
let resp = request.map_err(Error::S3HttpError)?;
|
||||||
|
match resp.error_for_status_ref() {
|
||||||
|
Ok(_) => Ok(resp),
|
||||||
|
Err(_) => Err(Error::S3Error {
|
||||||
|
status: resp.status(),
|
||||||
|
body: resp.text().await.unwrap_or_default(),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn extract_and_append_etag<'b>(
|
||||||
|
bump: &'b bumpalo::Bump,
|
||||||
|
etags: &mut Vec<&'b str>,
|
||||||
|
headers: &reqwest::header::HeaderMap,
|
||||||
|
) -> Result<()> {
|
||||||
|
use reqwest::header::ETAG;
|
||||||
|
|
||||||
|
let etag = headers.get(ETAG).ok_or_else(|| Error::S3XmlError("Missing ETag header".into()))?;
|
||||||
|
let etag = etag.to_str().map_err(|e| Error::S3XmlError(Box::new(e)))?;
|
||||||
|
etags.push(bump.alloc_str(etag));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
|||||||
[timestamp] [4,]
|
[timestamp] [4,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
@@ -37,7 +37,7 @@ catto [1,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
@@ -40,7 +40,7 @@ doggo [2,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 24, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.24.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ impl IndexScheduler {
|
|||||||
std::fs::create_dir_all(&options.auth_path).unwrap();
|
std::fs::create_dir_all(&options.auth_path).unwrap();
|
||||||
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
|
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
|
||||||
let index_scheduler =
|
let index_scheduler =
|
||||||
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
|
Self::new_test(options, auth_env, version, None, sender, planned_failures).unwrap();
|
||||||
|
|
||||||
// To be 100% consistent between all test we're going to start the scheduler right now
|
// To be 100% consistent between all test we're going to start the scheduler right now
|
||||||
// and ensure it's in the expected starting state.
|
// and ensure it's in the expected starting state.
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ pub fn upgrade_index_scheduler(
|
|||||||
(1, 22, _) => 0,
|
(1, 22, _) => 0,
|
||||||
(1, 23, _) => 0,
|
(1, 23, _) => 0,
|
||||||
(1, 24, _) => 0,
|
(1, 24, _) => 0,
|
||||||
|
(1, 25, _) => 0,
|
||||||
|
(1, 26, _) => 0,
|
||||||
(major, minor, patch) => {
|
(major, minor, patch) => {
|
||||||
if major > current_major
|
if major > current_major
|
||||||
|| (major == current_major && minor > current_minor)
|
|| (major == current_major && minor > current_minor)
|
||||||
@@ -98,6 +100,7 @@ pub fn upgrade_index_scheduler(
|
|||||||
status: Status::Enqueued,
|
status: Status::Enqueued,
|
||||||
kind: KindWithContent::UpgradeDatabase { from },
|
kind: KindWithContent::UpgradeDatabase { from },
|
||||||
network: None,
|
network: None,
|
||||||
|
custom_metadata: None,
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|||||||
@@ -379,6 +379,7 @@ impl crate::IndexScheduler {
|
|||||||
status,
|
status,
|
||||||
kind,
|
kind,
|
||||||
network: _,
|
network: _,
|
||||||
|
custom_metadata: _,
|
||||||
} = task;
|
} = task;
|
||||||
assert_eq!(uid, task.uid);
|
assert_eq!(uid, task.uid);
|
||||||
if task.status != Status::Enqueued {
|
if task.status != Status::Enqueued {
|
||||||
|
|||||||
@@ -254,10 +254,12 @@ InvalidSearchHybridQuery , InvalidRequest , BAD_REQU
|
|||||||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidIndexCustomMetadata , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchQueryPersonalization , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
|
||||||
@@ -315,6 +317,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
|||||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||||
@@ -390,6 +394,9 @@ TooManyVectors , InvalidRequest , BAD_REQU
|
|||||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||||
|
InvalidS3SnapshotRequest , Internal , BAD_REQUEST ;
|
||||||
|
InvalidS3SnapshotParameters , Internal , BAD_REQUEST ;
|
||||||
|
S3SnapshotServerError , Internal , BAD_GATEWAY ;
|
||||||
|
|
||||||
// Experimental features
|
// Experimental features
|
||||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
||||||
@@ -679,6 +686,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "the value of `personalize` is invalid, expected a JSON object with `userContext` string.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "the value of `userContext` is invalid, expected a string.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! internal_error {
|
macro_rules! internal_error {
|
||||||
($target:ty : $($other:path), *) => {
|
($target:ty : $($other:path), *) => {
|
||||||
|
|||||||
@@ -346,24 +346,26 @@ impl<T> Settings<T> {
|
|||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
Self::hide_secret(api_key);
|
hide_secret(api_key, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn hide_secret(secret: &mut String) {
|
/// Redact a secret string, starting from the `secret_offset`th byte.
|
||||||
match secret.len() {
|
pub fn hide_secret(secret: &mut String, secret_offset: usize) {
|
||||||
x if x < 10 => {
|
match secret.len().checked_sub(secret_offset) {
|
||||||
secret.replace_range(.., "XXX...");
|
None => (),
|
||||||
}
|
Some(x) if x < 10 => {
|
||||||
x if x < 20 => {
|
secret.replace_range(secret_offset.., "XXX...");
|
||||||
secret.replace_range(2.., "XXXX...");
|
}
|
||||||
}
|
Some(x) if x < 20 => {
|
||||||
x if x < 30 => {
|
secret.replace_range((secret_offset + 2).., "XXXX...");
|
||||||
secret.replace_range(3.., "XXXXX...");
|
}
|
||||||
}
|
Some(x) if x < 30 => {
|
||||||
_x => {
|
secret.replace_range((secret_offset + 3).., "XXXXX...");
|
||||||
secret.replace_range(5.., "XXXXXX...");
|
}
|
||||||
}
|
Some(_x) => {
|
||||||
|
secret.replace_range((secret_offset + 5).., "XXXXXX...");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,6 +55,9 @@ pub struct TaskView {
|
|||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub network: Option<TaskNetwork>,
|
pub network: Option<TaskNetwork>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub custom_metadata: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TaskView {
|
impl TaskView {
|
||||||
@@ -73,6 +76,7 @@ impl TaskView {
|
|||||||
started_at: task.started_at,
|
started_at: task.started_at,
|
||||||
finished_at: task.finished_at,
|
finished_at: task.finished_at,
|
||||||
network: task.network.clone(),
|
network: task.network.clone(),
|
||||||
|
custom_metadata: task.custom_metadata.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,6 +45,9 @@ pub struct Task {
|
|||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub network: Option<TaskNetwork>,
|
pub network: Option<TaskNetwork>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub custom_metadata: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Task {
|
impl Task {
|
||||||
|
|||||||
@@ -11,6 +11,24 @@ pub struct Webhook {
|
|||||||
pub headers: BTreeMap<String, String>,
|
pub headers: BTreeMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Webhook {
|
||||||
|
pub fn redact_authorization_header(&mut self) {
|
||||||
|
// headers are case insensitive, so to make the redaction robust we iterate over qualifying headers
|
||||||
|
// rather than getting one canonical `Authorization` header.
|
||||||
|
for value in self
|
||||||
|
.headers
|
||||||
|
.iter_mut()
|
||||||
|
.filter_map(|(name, value)| name.eq_ignore_ascii_case("authorization").then_some(value))
|
||||||
|
{
|
||||||
|
if value.starts_with("Bearer ") {
|
||||||
|
crate::settings::hide_secret(value, "Bearer ".len());
|
||||||
|
} else {
|
||||||
|
crate::settings::hide_secret(value, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
|
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct WebhooksView {
|
pub struct WebhooksView {
|
||||||
|
|||||||
@@ -208,6 +208,7 @@ struct Infos {
|
|||||||
experimental_no_edition_2024_for_prefix_post_processing: bool,
|
experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||||
experimental_no_edition_2024_for_facet_post_processing: bool,
|
experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||||
experimental_vector_store_setting: bool,
|
experimental_vector_store_setting: bool,
|
||||||
|
experimental_personalization: bool,
|
||||||
gpu_enabled: bool,
|
gpu_enabled: bool,
|
||||||
db_path: bool,
|
db_path: bool,
|
||||||
import_dump: bool,
|
import_dump: bool,
|
||||||
@@ -217,6 +218,7 @@ struct Infos {
|
|||||||
import_snapshot: bool,
|
import_snapshot: bool,
|
||||||
schedule_snapshot: Option<u64>,
|
schedule_snapshot: Option<u64>,
|
||||||
snapshot_dir: bool,
|
snapshot_dir: bool,
|
||||||
|
uses_s3_snapshots: bool,
|
||||||
ignore_missing_snapshot: bool,
|
ignore_missing_snapshot: bool,
|
||||||
ignore_snapshot_if_db_exists: bool,
|
ignore_snapshot_if_db_exists: bool,
|
||||||
http_addr: bool,
|
http_addr: bool,
|
||||||
@@ -285,6 +287,8 @@ impl Infos {
|
|||||||
indexer_options,
|
indexer_options,
|
||||||
config_file_path,
|
config_file_path,
|
||||||
no_analytics: _,
|
no_analytics: _,
|
||||||
|
experimental_personalization_api_key,
|
||||||
|
s3_snapshot_options,
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
let schedule_snapshot = match schedule_snapshot {
|
let schedule_snapshot = match schedule_snapshot {
|
||||||
@@ -348,6 +352,7 @@ impl Infos {
|
|||||||
import_snapshot: import_snapshot.is_some(),
|
import_snapshot: import_snapshot.is_some(),
|
||||||
schedule_snapshot,
|
schedule_snapshot,
|
||||||
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
||||||
|
uses_s3_snapshots: s3_snapshot_options.is_some(),
|
||||||
ignore_missing_snapshot,
|
ignore_missing_snapshot,
|
||||||
ignore_snapshot_if_db_exists,
|
ignore_snapshot_if_db_exists,
|
||||||
http_addr: http_addr != default_http_addr(),
|
http_addr: http_addr != default_http_addr(),
|
||||||
@@ -371,6 +376,7 @@ impl Infos {
|
|||||||
experimental_no_edition_2024_for_settings,
|
experimental_no_edition_2024_for_settings,
|
||||||
experimental_no_edition_2024_for_prefix_post_processing,
|
experimental_no_edition_2024_for_prefix_post_processing,
|
||||||
experimental_no_edition_2024_for_facet_post_processing,
|
experimental_no_edition_2024_for_facet_post_processing,
|
||||||
|
experimental_personalization: experimental_personalization_api_key.is_some(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ pub enum MeilisearchHttpError {
|
|||||||
PaginationInFederatedQuery(usize, &'static str),
|
PaginationInFederatedQuery(usize, &'static str),
|
||||||
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
|
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
|
||||||
FacetsInFederatedQuery(usize, String, Vec<String>),
|
FacetsInFederatedQuery(usize, String, Vec<String>),
|
||||||
|
#[error("Inside `.queries[{0}]`: Using `.personalize` is not allowed in federated queries.\n - Hint: remove `personalize` from query #{0} or remove `federation` from the request")]
|
||||||
|
PersonalizationInFederatedQuery(usize),
|
||||||
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
|
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
|
||||||
InconsistentFacetOrder {
|
InconsistentFacetOrder {
|
||||||
facet: String,
|
facet: String,
|
||||||
@@ -137,6 +139,9 @@ impl ErrorCode for MeilisearchHttpError {
|
|||||||
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
||||||
Code::InvalidMultiSearchFacetOrder
|
Code::InvalidMultiSearchFacetOrder
|
||||||
}
|
}
|
||||||
|
MeilisearchHttpError::PersonalizationInFederatedQuery(_) => {
|
||||||
|
Code::InvalidMultiSearchQueryPersonalization
|
||||||
|
}
|
||||||
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
||||||
Code::InconsistentDocumentChangeHeaders
|
Code::InconsistentDocumentChangeHeaders
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ pub mod middleware;
|
|||||||
pub mod option;
|
pub mod option;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod option_test;
|
mod option_test;
|
||||||
|
pub mod personalization;
|
||||||
pub mod routes;
|
pub mod routes;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
pub mod search_queue;
|
pub mod search_queue;
|
||||||
@@ -58,6 +59,7 @@ use tracing::{error, info_span};
|
|||||||
use tracing_subscriber::filter::Targets;
|
use tracing_subscriber::filter::Targets;
|
||||||
|
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
|
use crate::personalization::PersonalizationService;
|
||||||
|
|
||||||
/// Default number of simultaneously opened indexes.
|
/// Default number of simultaneously opened indexes.
|
||||||
///
|
///
|
||||||
@@ -128,12 +130,8 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered<
|
|||||||
>;
|
>;
|
||||||
|
|
||||||
pub fn create_app(
|
pub fn create_app(
|
||||||
index_scheduler: Data<IndexScheduler>,
|
services: ServicesData,
|
||||||
auth_controller: Data<AuthController>,
|
|
||||||
search_queue: Data<SearchQueue>,
|
|
||||||
opt: Opt,
|
opt: Opt,
|
||||||
logs: (LogRouteHandle, LogStderrHandle),
|
|
||||||
analytics: Data<Analytics>,
|
|
||||||
enable_dashboard: bool,
|
enable_dashboard: bool,
|
||||||
) -> actix_web::App<
|
) -> actix_web::App<
|
||||||
impl ServiceFactory<
|
impl ServiceFactory<
|
||||||
@@ -145,17 +143,7 @@ pub fn create_app(
|
|||||||
>,
|
>,
|
||||||
> {
|
> {
|
||||||
let app = actix_web::App::new()
|
let app = actix_web::App::new()
|
||||||
.configure(|s| {
|
.configure(|s| configure_data(s, services, &opt))
|
||||||
configure_data(
|
|
||||||
s,
|
|
||||||
index_scheduler.clone(),
|
|
||||||
auth_controller.clone(),
|
|
||||||
search_queue.clone(),
|
|
||||||
&opt,
|
|
||||||
logs,
|
|
||||||
analytics.clone(),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.configure(routes::configure)
|
.configure(routes::configure)
|
||||||
.configure(|s| dashboard(s, enable_dashboard));
|
.configure(|s| dashboard(s, enable_dashboard));
|
||||||
|
|
||||||
@@ -216,7 +204,10 @@ enum OnFailure {
|
|||||||
KeepDb,
|
KeepDb,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
pub fn setup_meilisearch(
|
||||||
|
opt: &Opt,
|
||||||
|
handle: tokio::runtime::Handle,
|
||||||
|
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||||
let index_scheduler_opt = IndexSchedulerOptions {
|
let index_scheduler_opt = IndexSchedulerOptions {
|
||||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||||
auth_path: opt.db_path.join("auth"),
|
auth_path: opt.db_path.join("auth"),
|
||||||
@@ -230,7 +221,11 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
|||||||
task_db_size: opt.max_task_db_size.as_u64() as usize,
|
task_db_size: opt.max_task_db_size.as_u64() as usize,
|
||||||
index_base_map_size: opt.max_index_size.as_u64() as usize,
|
index_base_map_size: opt.max_index_size.as_u64() as usize,
|
||||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||||
indexer_config: Arc::new((&opt.indexer_options).try_into()?),
|
indexer_config: Arc::new({
|
||||||
|
let s3_snapshot_options =
|
||||||
|
opt.s3_snapshot_options.clone().map(|opt| opt.try_into()).transpose()?;
|
||||||
|
IndexerConfig { s3_snapshot_options, ..(&opt.indexer_options).try_into()? }
|
||||||
|
}),
|
||||||
autobatching_enabled: true,
|
autobatching_enabled: true,
|
||||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||||
max_number_of_tasks: 1_000_000,
|
max_number_of_tasks: 1_000_000,
|
||||||
@@ -256,6 +251,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
|||||||
index_scheduler_opt,
|
index_scheduler_opt,
|
||||||
OnFailure::RemoveDb,
|
OnFailure::RemoveDb,
|
||||||
binary_version, // the db is empty
|
binary_version, // the db is empty
|
||||||
|
handle,
|
||||||
)?,
|
)?,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
std::fs::remove_dir_all(&opt.db_path)?;
|
std::fs::remove_dir_all(&opt.db_path)?;
|
||||||
@@ -273,7 +269,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
|||||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||||
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
||||||
} else {
|
} else {
|
||||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||||
}
|
}
|
||||||
} else if let Some(ref path) = opt.import_dump {
|
} else if let Some(ref path) = opt.import_dump {
|
||||||
let src_path_exists = path.exists();
|
let src_path_exists = path.exists();
|
||||||
@@ -284,6 +280,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
|||||||
index_scheduler_opt,
|
index_scheduler_opt,
|
||||||
OnFailure::RemoveDb,
|
OnFailure::RemoveDb,
|
||||||
binary_version, // the db is empty
|
binary_version, // the db is empty
|
||||||
|
handle,
|
||||||
)?;
|
)?;
|
||||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||||
Ok(()) => (index_scheduler, auth_controller),
|
Ok(()) => (index_scheduler, auth_controller),
|
||||||
@@ -304,10 +301,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
|||||||
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
||||||
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
||||||
} else {
|
} else {
|
||||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
|
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
|
||||||
};
|
};
|
||||||
|
|
||||||
// We create a loop in a thread that registers snapshotCreation tasks
|
// We create a loop in a thread that registers snapshotCreation tasks
|
||||||
@@ -338,6 +335,7 @@ fn open_or_create_database_unchecked(
|
|||||||
index_scheduler_opt: IndexSchedulerOptions,
|
index_scheduler_opt: IndexSchedulerOptions,
|
||||||
on_failure: OnFailure,
|
on_failure: OnFailure,
|
||||||
version: (u32, u32, u32),
|
version: (u32, u32, u32),
|
||||||
|
handle: tokio::runtime::Handle,
|
||||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||||
// we don't want to create anything in the data.ms yet, thus we
|
// we don't want to create anything in the data.ms yet, thus we
|
||||||
// wrap our two builders in a closure that'll be executed later.
|
// wrap our two builders in a closure that'll be executed later.
|
||||||
@@ -345,7 +343,7 @@ fn open_or_create_database_unchecked(
|
|||||||
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|
||||||
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
|
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
|
||||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
|
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version, Some(handle))?)
|
||||||
};
|
};
|
||||||
|
|
||||||
match (
|
match (
|
||||||
@@ -452,6 +450,7 @@ fn open_or_create_database(
|
|||||||
index_scheduler_opt: IndexSchedulerOptions,
|
index_scheduler_opt: IndexSchedulerOptions,
|
||||||
empty_db: bool,
|
empty_db: bool,
|
||||||
binary_version: (u32, u32, u32),
|
binary_version: (u32, u32, u32),
|
||||||
|
handle: tokio::runtime::Handle,
|
||||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||||
let version = if !empty_db {
|
let version = if !empty_db {
|
||||||
check_version(opt, &index_scheduler_opt, binary_version)?
|
check_version(opt, &index_scheduler_opt, binary_version)?
|
||||||
@@ -459,7 +458,7 @@ fn open_or_create_database(
|
|||||||
binary_version
|
binary_version
|
||||||
};
|
};
|
||||||
|
|
||||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
|
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version, handle)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn import_dump(
|
fn import_dump(
|
||||||
@@ -527,7 +526,11 @@ fn import_dump(
|
|||||||
let indexer_config = if base_config.max_threads.is_none() {
|
let indexer_config = if base_config.max_threads.is_none() {
|
||||||
let (thread_pool, _) = default_thread_pool_and_threads();
|
let (thread_pool, _) = default_thread_pool_and_threads();
|
||||||
|
|
||||||
let _config = IndexerConfig { thread_pool, ..*base_config };
|
let _config = IndexerConfig {
|
||||||
|
thread_pool,
|
||||||
|
s3_snapshot_options: base_config.s3_snapshot_options.clone(),
|
||||||
|
..*base_config
|
||||||
|
};
|
||||||
backup_config = _config;
|
backup_config = _config;
|
||||||
&backup_config
|
&backup_config
|
||||||
} else {
|
} else {
|
||||||
@@ -675,23 +678,26 @@ fn import_dump(
|
|||||||
Ok(index_scheduler_dump.finish()?)
|
Ok(index_scheduler_dump.finish()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn configure_data(
|
pub fn configure_data(config: &mut web::ServiceConfig, services: ServicesData, opt: &Opt) {
|
||||||
config: &mut web::ServiceConfig,
|
let ServicesData {
|
||||||
index_scheduler: Data<IndexScheduler>,
|
index_scheduler,
|
||||||
auth: Data<AuthController>,
|
auth,
|
||||||
search_queue: Data<SearchQueue>,
|
search_queue,
|
||||||
opt: &Opt,
|
personalization_service,
|
||||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
logs_route_handle,
|
||||||
analytics: Data<Analytics>,
|
logs_stderr_handle,
|
||||||
) {
|
analytics,
|
||||||
|
} = services;
|
||||||
|
|
||||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||||
config
|
config
|
||||||
.app_data(index_scheduler)
|
.app_data(index_scheduler)
|
||||||
.app_data(auth)
|
.app_data(auth)
|
||||||
.app_data(search_queue)
|
.app_data(search_queue)
|
||||||
.app_data(analytics)
|
.app_data(analytics)
|
||||||
.app_data(web::Data::new(logs_route))
|
.app_data(personalization_service)
|
||||||
.app_data(web::Data::new(logs_stderr))
|
.app_data(logs_route_handle)
|
||||||
|
.app_data(logs_stderr_handle)
|
||||||
.app_data(web::Data::new(opt.clone()))
|
.app_data(web::Data::new(opt.clone()))
|
||||||
.app_data(
|
.app_data(
|
||||||
web::JsonConfig::default()
|
web::JsonConfig::default()
|
||||||
@@ -752,3 +758,14 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
|
|||||||
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
||||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct ServicesData {
|
||||||
|
pub index_scheduler: Data<IndexScheduler>,
|
||||||
|
pub auth: Data<AuthController>,
|
||||||
|
pub search_queue: Data<SearchQueue>,
|
||||||
|
pub personalization_service: Data<PersonalizationService>,
|
||||||
|
pub logs_route_handle: Data<LogRouteHandle>,
|
||||||
|
pub logs_stderr_handle: Data<LogStderrHandle>,
|
||||||
|
pub analytics: Data<Analytics>,
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,10 +14,11 @@ use index_scheduler::IndexScheduler;
|
|||||||
use is_terminal::IsTerminal;
|
use is_terminal::IsTerminal;
|
||||||
use meilisearch::analytics::Analytics;
|
use meilisearch::analytics::Analytics;
|
||||||
use meilisearch::option::LogMode;
|
use meilisearch::option::LogMode;
|
||||||
|
use meilisearch::personalization::PersonalizationService;
|
||||||
use meilisearch::search_queue::SearchQueue;
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{
|
use meilisearch::{
|
||||||
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
||||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
||||||
};
|
};
|
||||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||||
@@ -76,7 +77,10 @@ fn on_panic(info: &std::panic::PanicHookInfo) {
|
|||||||
|
|
||||||
#[actix_web::main]
|
#[actix_web::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
try_main().await.inspect_err(|error| {
|
// won't panic inside of tokio::main
|
||||||
|
let runtime = tokio::runtime::Handle::current();
|
||||||
|
|
||||||
|
try_main(runtime).await.inspect_err(|error| {
|
||||||
tracing::error!(%error);
|
tracing::error!(%error);
|
||||||
let mut current = error.source();
|
let mut current = error.source();
|
||||||
let mut depth = 0;
|
let mut depth = 0;
|
||||||
@@ -88,7 +92,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn try_main() -> anyhow::Result<()> {
|
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||||
let (opt, config_read_from) = Opt::try_build()?;
|
let (opt, config_read_from) = Opt::try_build()?;
|
||||||
|
|
||||||
std::panic::set_hook(Box::new(on_panic));
|
std::panic::set_hook(Box::new(on_panic));
|
||||||
@@ -122,7 +126,7 @@ async fn try_main() -> anyhow::Result<()> {
|
|||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, runtime)?;
|
||||||
|
|
||||||
let analytics =
|
let analytics =
|
||||||
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
|
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
|
||||||
@@ -149,8 +153,15 @@ async fn run_http(
|
|||||||
let enable_dashboard = &opt.env == "development";
|
let enable_dashboard = &opt.env == "development";
|
||||||
let opt_clone = opt.clone();
|
let opt_clone = opt.clone();
|
||||||
let index_scheduler = Data::from(index_scheduler);
|
let index_scheduler = Data::from(index_scheduler);
|
||||||
let auth_controller = Data::from(auth_controller);
|
let auth = Data::from(auth_controller);
|
||||||
let analytics = Data::from(analytics);
|
let analytics = Data::from(analytics);
|
||||||
|
// Create personalization service with API key from options
|
||||||
|
let personalization_service = Data::new(
|
||||||
|
opt.experimental_personalization_api_key
|
||||||
|
.clone()
|
||||||
|
.map(PersonalizationService::cohere)
|
||||||
|
.unwrap_or_else(PersonalizationService::disabled),
|
||||||
|
);
|
||||||
let search_queue = SearchQueue::new(
|
let search_queue = SearchQueue::new(
|
||||||
opt.experimental_search_queue_size,
|
opt.experimental_search_queue_size,
|
||||||
available_parallelism()
|
available_parallelism()
|
||||||
@@ -162,21 +173,25 @@ async fn run_http(
|
|||||||
usize::from(opt.experimental_drop_search_after) as u64
|
usize::from(opt.experimental_drop_search_after) as u64
|
||||||
));
|
));
|
||||||
let search_queue = Data::new(search_queue);
|
let search_queue = Data::new(search_queue);
|
||||||
|
let (logs_route_handle, logs_stderr_handle) = logs;
|
||||||
|
let logs_route_handle = Data::new(logs_route_handle);
|
||||||
|
let logs_stderr_handle = Data::new(logs_stderr_handle);
|
||||||
|
|
||||||
let http_server = HttpServer::new(move || {
|
let services = ServicesData {
|
||||||
create_app(
|
index_scheduler,
|
||||||
index_scheduler.clone(),
|
auth,
|
||||||
auth_controller.clone(),
|
search_queue,
|
||||||
search_queue.clone(),
|
personalization_service,
|
||||||
opt.clone(),
|
logs_route_handle,
|
||||||
logs.clone(),
|
logs_stderr_handle,
|
||||||
analytics.clone(),
|
analytics,
|
||||||
enable_dashboard,
|
};
|
||||||
)
|
|
||||||
})
|
let http_server =
|
||||||
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
|
HttpServer::new(move || create_app(services.clone(), opt.clone(), enable_dashboard))
|
||||||
.disable_signals()
|
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
|
||||||
.keep_alive(KeepAlive::Os);
|
.disable_signals()
|
||||||
|
.keep_alive(KeepAlive::Os);
|
||||||
|
|
||||||
if let Some(config) = opt_clone.get_ssl_config()? {
|
if let Some(config) = opt_clone.get_ssl_config()? {
|
||||||
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?;
|
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?;
|
||||||
|
|||||||
@@ -114,4 +114,9 @@ lazy_static! {
|
|||||||
"Meilisearch Task Queue Size Until Stop Registering",
|
"Meilisearch Task Queue Size Until Stop Registering",
|
||||||
))
|
))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
|
||||||
|
"meilisearch_personalized_search_requests",
|
||||||
|
"Meilisearch number of search requests with personalization"
|
||||||
|
))
|
||||||
|
.expect("Can't create a metric");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,12 +7,13 @@ use std::ops::Deref;
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
use std::{env, fmt, fs};
|
use std::{env, fmt, fs};
|
||||||
|
|
||||||
use byte_unit::{Byte, ParseError, UnitType};
|
use byte_unit::{Byte, ParseError, UnitType};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch_types::features::InstanceTogglableFeatures;
|
use meilisearch_types::features::InstanceTogglableFeatures;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::{IndexerConfig, S3SnapshotOptions};
|
||||||
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
|
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
|
||||||
use rustls::server::{ServerSessionMemoryCache, WebPkiClientVerifier};
|
use rustls::server::{ServerSessionMemoryCache, WebPkiClientVerifier};
|
||||||
use rustls::RootCertStore;
|
use rustls::RootCertStore;
|
||||||
@@ -74,6 +75,22 @@ const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
|||||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
|
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
|
||||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
|
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
|
||||||
|
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
|
||||||
|
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
|
||||||
|
|
||||||
|
// Related to S3 snapshots
|
||||||
|
const MEILI_S3_BUCKET_URL: &str = "MEILI_S3_BUCKET_URL";
|
||||||
|
const MEILI_S3_BUCKET_REGION: &str = "MEILI_S3_BUCKET_REGION";
|
||||||
|
const MEILI_S3_BUCKET_NAME: &str = "MEILI_S3_BUCKET_NAME";
|
||||||
|
const MEILI_S3_SNAPSHOT_PREFIX: &str = "MEILI_S3_SNAPSHOT_PREFIX";
|
||||||
|
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
|
||||||
|
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
|
||||||
|
const MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS: &str = "MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS";
|
||||||
|
const MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL: &str = "MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL";
|
||||||
|
const MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS: &str =
|
||||||
|
"MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS";
|
||||||
|
const MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE: &str = "MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE";
|
||||||
|
|
||||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||||
@@ -83,6 +100,10 @@ const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
|
|||||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
|
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
|
||||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC_STR: &str = "86400";
|
const DEFAULT_SNAPSHOT_INTERVAL_SEC_STR: &str = "86400";
|
||||||
const DEFAULT_DUMP_DIR: &str = "dumps/";
|
const DEFAULT_DUMP_DIR: &str = "dumps/";
|
||||||
|
const DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS: NonZeroUsize = NonZeroUsize::new(10).unwrap();
|
||||||
|
const DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL: u32 = 0;
|
||||||
|
const DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS: u64 = 8 * 3600; // 8 hours
|
||||||
|
const DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE: Byte = Byte::from_u64(375 * 1024 * 1024); // 375 MiB
|
||||||
|
|
||||||
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
|
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
|
||||||
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
|
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
|
||||||
@@ -475,10 +496,20 @@ pub struct Opt {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub experimental_no_snapshot_compaction: bool,
|
pub experimental_no_snapshot_compaction: bool,
|
||||||
|
|
||||||
|
/// Experimental personalization API key feature.
|
||||||
|
///
|
||||||
|
/// Sets the API key for personalization features.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
|
||||||
|
pub experimental_personalization_api_key: Option<String>,
|
||||||
|
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
#[clap(flatten)]
|
#[clap(flatten)]
|
||||||
pub indexer_options: IndexerOpts,
|
pub indexer_options: IndexerOpts,
|
||||||
|
|
||||||
|
#[serde(flatten)]
|
||||||
|
#[clap(flatten)]
|
||||||
|
pub s3_snapshot_options: Option<S3SnapshotOpts>,
|
||||||
|
|
||||||
/// Set the path to a configuration file that should be used to setup the engine.
|
/// Set the path to a configuration file that should be used to setup the engine.
|
||||||
/// Format must be TOML.
|
/// Format must be TOML.
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
@@ -580,6 +611,8 @@ impl Opt {
|
|||||||
experimental_limit_batched_tasks_total_size,
|
experimental_limit_batched_tasks_total_size,
|
||||||
experimental_embedding_cache_entries,
|
experimental_embedding_cache_entries,
|
||||||
experimental_no_snapshot_compaction,
|
experimental_no_snapshot_compaction,
|
||||||
|
experimental_personalization_api_key,
|
||||||
|
s3_snapshot_options,
|
||||||
} = self;
|
} = self;
|
||||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||||
@@ -680,7 +713,22 @@ impl Opt {
|
|||||||
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
|
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
|
||||||
experimental_no_snapshot_compaction.to_string(),
|
experimental_no_snapshot_compaction.to_string(),
|
||||||
);
|
);
|
||||||
|
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
|
||||||
|
experimental_personalization_api_key,
|
||||||
|
);
|
||||||
|
}
|
||||||
indexer_options.export_to_env();
|
indexer_options.export_to_env();
|
||||||
|
if let Some(s3_snapshot_options) = s3_snapshot_options {
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
{
|
||||||
|
let _ = s3_snapshot_options;
|
||||||
|
panic!("S3 snapshot options are not supported on Windows");
|
||||||
|
}
|
||||||
|
#[cfg(unix)]
|
||||||
|
s3_snapshot_options.export_to_env();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
|
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
|
||||||
@@ -849,6 +897,16 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
type Error = anyhow::Error;
|
type Error = anyhow::Error;
|
||||||
|
|
||||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||||
|
let IndexerOpts {
|
||||||
|
max_indexing_memory,
|
||||||
|
max_indexing_threads,
|
||||||
|
skip_index_budget,
|
||||||
|
experimental_no_edition_2024_for_settings,
|
||||||
|
experimental_no_edition_2024_for_dumps,
|
||||||
|
experimental_no_edition_2024_for_prefix_post_processing,
|
||||||
|
experimental_no_edition_2024_for_facet_post_processing,
|
||||||
|
} = other;
|
||||||
|
|
||||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||||
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
|
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
|
||||||
.build()?;
|
.build()?;
|
||||||
@@ -856,21 +914,163 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
thread_pool,
|
thread_pool,
|
||||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||||
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
|
max_memory: max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||||
max_threads: *other.max_indexing_threads,
|
max_threads: max_indexing_threads.0,
|
||||||
max_positions_per_attributes: None,
|
max_positions_per_attributes: None,
|
||||||
skip_index_budget: other.skip_index_budget,
|
skip_index_budget: *skip_index_budget,
|
||||||
experimental_no_edition_2024_for_settings: other
|
experimental_no_edition_2024_for_settings: *experimental_no_edition_2024_for_settings,
|
||||||
.experimental_no_edition_2024_for_settings,
|
experimental_no_edition_2024_for_dumps: *experimental_no_edition_2024_for_dumps,
|
||||||
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
|
|
||||||
chunk_compression_type: Default::default(),
|
chunk_compression_type: Default::default(),
|
||||||
chunk_compression_level: Default::default(),
|
chunk_compression_level: Default::default(),
|
||||||
documents_chunk_size: Default::default(),
|
documents_chunk_size: Default::default(),
|
||||||
max_nb_chunks: Default::default(),
|
max_nb_chunks: Default::default(),
|
||||||
experimental_no_edition_2024_for_prefix_post_processing: other
|
experimental_no_edition_2024_for_prefix_post_processing:
|
||||||
.experimental_no_edition_2024_for_prefix_post_processing,
|
*experimental_no_edition_2024_for_prefix_post_processing,
|
||||||
experimental_no_edition_2024_for_facet_post_processing: other
|
experimental_no_edition_2024_for_facet_post_processing:
|
||||||
.experimental_no_edition_2024_for_facet_post_processing,
|
*experimental_no_edition_2024_for_facet_post_processing,
|
||||||
|
s3_snapshot_options: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Parser, Deserialize)]
|
||||||
|
// This group is a bit tricky but makes it possible to require all listed fields if one of them
|
||||||
|
// is specified. It lets us keep an Option for the S3SnapshotOpts configuration.
|
||||||
|
// <https://github.com/clap-rs/clap/issues/5092#issuecomment-2616986075>
|
||||||
|
#[group(requires_all = ["s3_bucket_url", "s3_bucket_region", "s3_bucket_name", "s3_snapshot_prefix", "s3_access_key", "s3_secret_key"])]
|
||||||
|
pub struct S3SnapshotOpts {
|
||||||
|
/// The S3 bucket URL in the format https://s3.<region>.amazonaws.com.
|
||||||
|
#[clap(long, env = MEILI_S3_BUCKET_URL, required = false)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub s3_bucket_url: String,
|
||||||
|
|
||||||
|
/// The region in the format us-east-1.
|
||||||
|
#[clap(long, env = MEILI_S3_BUCKET_REGION, required = false)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub s3_bucket_region: String,
|
||||||
|
|
||||||
|
/// The bucket name.
|
||||||
|
#[clap(long, env = MEILI_S3_BUCKET_NAME, required = false)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub s3_bucket_name: String,
|
||||||
|
|
||||||
|
/// The prefix path where to put the snapshot, uses normal slashes (/).
|
||||||
|
#[clap(long, env = MEILI_S3_SNAPSHOT_PREFIX, required = false)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub s3_snapshot_prefix: String,
|
||||||
|
|
||||||
|
/// The S3 access key.
|
||||||
|
#[clap(long, env = MEILI_S3_ACCESS_KEY, required = false)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub s3_access_key: String,
|
||||||
|
|
||||||
|
/// The S3 secret key.
|
||||||
|
#[clap(long, env = MEILI_S3_SECRET_KEY, required = false)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub s3_secret_key: String,
|
||||||
|
|
||||||
|
/// The maximum number of parts that can be uploaded in parallel.
|
||||||
|
///
|
||||||
|
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS, default_value_t = default_experimental_s3_snapshot_max_in_flight_parts())]
|
||||||
|
#[serde(default = "default_experimental_s3_snapshot_max_in_flight_parts")]
|
||||||
|
pub experimental_s3_max_in_flight_parts: NonZeroUsize,
|
||||||
|
|
||||||
|
/// The compression level. Defaults to no compression (0).
|
||||||
|
///
|
||||||
|
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL, default_value_t = default_experimental_s3_snapshot_compression_level())]
|
||||||
|
#[serde(default = "default_experimental_s3_snapshot_compression_level")]
|
||||||
|
pub experimental_s3_compression_level: u32,
|
||||||
|
|
||||||
|
/// The signature duration for the multipart upload.
|
||||||
|
///
|
||||||
|
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS, default_value_t = default_experimental_s3_snapshot_signature_duration_seconds())]
|
||||||
|
#[serde(default = "default_experimental_s3_snapshot_signature_duration_seconds")]
|
||||||
|
pub experimental_s3_signature_duration_seconds: u64,
|
||||||
|
|
||||||
|
/// The size of the the multipart parts.
|
||||||
|
///
|
||||||
|
/// Must not be less than 10MiB and larger than 8GiB. Yes,
|
||||||
|
/// twice the boundaries of the AWS S3 multipart upload
|
||||||
|
/// because we use it a bit differently internally.
|
||||||
|
///
|
||||||
|
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE, default_value_t = default_experimental_s3_snapshot_multipart_part_size())]
|
||||||
|
#[serde(default = "default_experimental_s3_snapshot_multipart_part_size")]
|
||||||
|
pub experimental_s3_multipart_part_size: Byte,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl S3SnapshotOpts {
|
||||||
|
/// Exports the values to their corresponding env vars if they are not set.
|
||||||
|
pub fn export_to_env(self) {
|
||||||
|
let S3SnapshotOpts {
|
||||||
|
s3_bucket_url,
|
||||||
|
s3_bucket_region,
|
||||||
|
s3_bucket_name,
|
||||||
|
s3_snapshot_prefix,
|
||||||
|
s3_access_key,
|
||||||
|
s3_secret_key,
|
||||||
|
experimental_s3_max_in_flight_parts,
|
||||||
|
experimental_s3_compression_level,
|
||||||
|
experimental_s3_signature_duration_seconds,
|
||||||
|
experimental_s3_multipart_part_size,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
export_to_env_if_not_present(MEILI_S3_BUCKET_URL, s3_bucket_url);
|
||||||
|
export_to_env_if_not_present(MEILI_S3_BUCKET_REGION, s3_bucket_region);
|
||||||
|
export_to_env_if_not_present(MEILI_S3_BUCKET_NAME, s3_bucket_name);
|
||||||
|
export_to_env_if_not_present(MEILI_S3_SNAPSHOT_PREFIX, s3_snapshot_prefix);
|
||||||
|
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
|
||||||
|
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS,
|
||||||
|
experimental_s3_max_in_flight_parts.to_string(),
|
||||||
|
);
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL,
|
||||||
|
experimental_s3_compression_level.to_string(),
|
||||||
|
);
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS,
|
||||||
|
experimental_s3_signature_duration_seconds.to_string(),
|
||||||
|
);
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE,
|
||||||
|
experimental_s3_multipart_part_size.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<S3SnapshotOpts> for S3SnapshotOptions {
|
||||||
|
type Error = anyhow::Error;
|
||||||
|
|
||||||
|
fn try_from(other: S3SnapshotOpts) -> Result<Self, Self::Error> {
|
||||||
|
let S3SnapshotOpts {
|
||||||
|
s3_bucket_url,
|
||||||
|
s3_bucket_region,
|
||||||
|
s3_bucket_name,
|
||||||
|
s3_snapshot_prefix,
|
||||||
|
s3_access_key,
|
||||||
|
s3_secret_key,
|
||||||
|
experimental_s3_max_in_flight_parts,
|
||||||
|
experimental_s3_compression_level,
|
||||||
|
experimental_s3_signature_duration_seconds,
|
||||||
|
experimental_s3_multipart_part_size,
|
||||||
|
} = other;
|
||||||
|
|
||||||
|
Ok(S3SnapshotOptions {
|
||||||
|
s3_bucket_url,
|
||||||
|
s3_bucket_region,
|
||||||
|
s3_bucket_name,
|
||||||
|
s3_snapshot_prefix,
|
||||||
|
s3_access_key,
|
||||||
|
s3_secret_key,
|
||||||
|
s3_max_in_flight_parts: experimental_s3_max_in_flight_parts,
|
||||||
|
s3_compression_level: experimental_s3_compression_level,
|
||||||
|
s3_signature_duration: Duration::from_secs(experimental_s3_signature_duration_seconds),
|
||||||
|
s3_multipart_part_size: experimental_s3_multipart_part_size.as_u64(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1089,6 +1289,22 @@ fn default_snapshot_interval_sec() -> &'static str {
|
|||||||
DEFAULT_SNAPSHOT_INTERVAL_SEC_STR
|
DEFAULT_SNAPSHOT_INTERVAL_SEC_STR
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn default_experimental_s3_snapshot_max_in_flight_parts() -> NonZeroUsize {
|
||||||
|
DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_experimental_s3_snapshot_compression_level() -> u32 {
|
||||||
|
DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_experimental_s3_snapshot_signature_duration_seconds() -> u64 {
|
||||||
|
DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_experimental_s3_snapshot_multipart_part_size() -> Byte {
|
||||||
|
DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE
|
||||||
|
}
|
||||||
|
|
||||||
fn default_dump_dir() -> PathBuf {
|
fn default_dump_dir() -> PathBuf {
|
||||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||||
}
|
}
|
||||||
|
|||||||
366
crates/meilisearch/src/personalization/mod.rs
Normal file
366
crates/meilisearch/src/personalization/mod.rs
Normal file
@@ -0,0 +1,366 @@
|
|||||||
|
use crate::search::{Personalize, SearchResult};
|
||||||
|
use meilisearch_types::{
|
||||||
|
error::{Code, ErrorCode, ResponseError},
|
||||||
|
milli::TimeBudget,
|
||||||
|
};
|
||||||
|
use rand::Rng;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
|
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
|
||||||
|
const MAX_RETRIES: u32 = 10;
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
enum PersonalizationError {
|
||||||
|
#[error("Personalization service: HTTP request failed: {0}")]
|
||||||
|
Request(#[from] reqwest::Error),
|
||||||
|
#[error("Personalization service: Failed to parse response: {0}")]
|
||||||
|
Parse(String),
|
||||||
|
#[error("Personalization service: Cohere API error: {0}")]
|
||||||
|
Api(String),
|
||||||
|
#[error("Personalization service: Unauthorized: invalid API key")]
|
||||||
|
Unauthorized,
|
||||||
|
#[error("Personalization service: Rate limited: too many requests")]
|
||||||
|
RateLimited,
|
||||||
|
#[error("Personalization service: Bad request: {0}")]
|
||||||
|
BadRequest(String),
|
||||||
|
#[error("Personalization service: Internal server error: {0}")]
|
||||||
|
InternalServerError(String),
|
||||||
|
#[error("Personalization service: Network error: {0}")]
|
||||||
|
Network(String),
|
||||||
|
#[error("Personalization service: Deadline exceeded")]
|
||||||
|
DeadlineExceeded,
|
||||||
|
#[error(transparent)]
|
||||||
|
FeatureNotEnabled(#[from] index_scheduler::error::FeatureNotEnabledError),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ErrorCode for PersonalizationError {
|
||||||
|
fn error_code(&self) -> Code {
|
||||||
|
match self {
|
||||||
|
PersonalizationError::FeatureNotEnabled { .. } => Code::FeatureNotEnabled,
|
||||||
|
PersonalizationError::Unauthorized => Code::RemoteInvalidApiKey,
|
||||||
|
PersonalizationError::RateLimited => Code::TooManySearchRequests,
|
||||||
|
PersonalizationError::BadRequest(_) => Code::RemoteBadRequest,
|
||||||
|
PersonalizationError::InternalServerError(_) => Code::RemoteRemoteError,
|
||||||
|
PersonalizationError::Network(_) | PersonalizationError::Request(_) => {
|
||||||
|
Code::RemoteCouldNotSendRequest
|
||||||
|
}
|
||||||
|
PersonalizationError::Parse(_) | PersonalizationError::Api(_) => {
|
||||||
|
Code::RemoteBadResponse
|
||||||
|
}
|
||||||
|
PersonalizationError::DeadlineExceeded => Code::Internal, // should not be returned to the client
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CohereService {
|
||||||
|
client: Client,
|
||||||
|
api_key: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CohereService {
|
||||||
|
pub fn new(api_key: String) -> Self {
|
||||||
|
info!("Personalization service initialized with Cohere API");
|
||||||
|
let client = Client::builder()
|
||||||
|
.timeout(Duration::from_secs(30))
|
||||||
|
.build()
|
||||||
|
.expect("Failed to create HTTP client");
|
||||||
|
Self { client, api_key }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn rerank_search_results(
|
||||||
|
&self,
|
||||||
|
search_result: SearchResult,
|
||||||
|
personalize: &Personalize,
|
||||||
|
query: Option<&str>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
|
) -> Result<SearchResult, ResponseError> {
|
||||||
|
if time_budget.exceeded() {
|
||||||
|
warn!("Could not rerank due to deadline");
|
||||||
|
// If the deadline is exceeded, return the original search result instead of an error
|
||||||
|
return Ok(search_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract user context from personalization
|
||||||
|
let user_context = personalize.user_context.as_str();
|
||||||
|
|
||||||
|
// Build the prompt by merging query and user context
|
||||||
|
let prompt = match query {
|
||||||
|
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
|
||||||
|
None => format!("User Context: {user_context}"),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract documents for reranking
|
||||||
|
let documents: Vec<String> = search_result
|
||||||
|
.hits
|
||||||
|
.iter()
|
||||||
|
.map(|hit| {
|
||||||
|
// Convert the document to a string representation for reranking
|
||||||
|
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if documents.is_empty() {
|
||||||
|
return Ok(search_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call Cohere's rerank API with retry logic
|
||||||
|
let reranked_indices =
|
||||||
|
match self.call_rerank_with_retry(&prompt, &documents, time_budget).await {
|
||||||
|
Ok(indices) => indices,
|
||||||
|
Err(PersonalizationError::DeadlineExceeded) => {
|
||||||
|
// If the deadline is exceeded, return the original search result instead of an error
|
||||||
|
return Ok(search_result);
|
||||||
|
}
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
|
||||||
|
|
||||||
|
// Reorder the hits based on Cohere's reranking
|
||||||
|
let mut reranked_hits = Vec::new();
|
||||||
|
for index in reranked_indices.iter() {
|
||||||
|
if let Some(hit) = search_result.hits.get(*index) {
|
||||||
|
reranked_hits.push(hit.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(SearchResult { hits: reranked_hits, ..search_result })
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn call_rerank_with_retry(
|
||||||
|
&self,
|
||||||
|
query: &str,
|
||||||
|
documents: &[String],
|
||||||
|
time_budget: TimeBudget,
|
||||||
|
) -> Result<Vec<usize>, PersonalizationError> {
|
||||||
|
let request_body = CohereRerankRequest {
|
||||||
|
query: query.to_string(),
|
||||||
|
documents: documents.to_vec(),
|
||||||
|
model: "rerank-english-v3.0".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Retry loop similar to vector extraction
|
||||||
|
for attempt in 0..MAX_RETRIES {
|
||||||
|
let response_result = self.send_rerank_request(&request_body).await;
|
||||||
|
|
||||||
|
let retry_duration = match self.handle_response(response_result).await {
|
||||||
|
Ok(indices) => return Ok(indices),
|
||||||
|
Err(retry) => {
|
||||||
|
warn!("Cohere rerank attempt #{} failed: {}", attempt, retry.error);
|
||||||
|
|
||||||
|
if time_budget.exceeded() {
|
||||||
|
warn!("Could not rerank due to deadline");
|
||||||
|
return Err(PersonalizationError::DeadlineExceeded);
|
||||||
|
} else {
|
||||||
|
match retry.into_duration(attempt) {
|
||||||
|
Ok(d) => d,
|
||||||
|
Err(error) => return Err(error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// randomly up to double the retry duration
|
||||||
|
let retry_duration = retry_duration
|
||||||
|
+ rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration);
|
||||||
|
|
||||||
|
warn!("Retrying after {}ms", retry_duration.as_millis());
|
||||||
|
tokio::time::sleep(retry_duration).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final attempt without retry
|
||||||
|
let response_result = self.send_rerank_request(&request_body).await;
|
||||||
|
|
||||||
|
match self.handle_response(response_result).await {
|
||||||
|
Ok(indices) => Ok(indices),
|
||||||
|
Err(retry) => Err(retry.into_error()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn send_rerank_request(
|
||||||
|
&self,
|
||||||
|
request_body: &CohereRerankRequest,
|
||||||
|
) -> Result<reqwest::Response, reqwest::Error> {
|
||||||
|
self.client
|
||||||
|
.post(COHERE_API_URL)
|
||||||
|
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||||
|
.header("Content-Type", "application/json")
|
||||||
|
.json(request_body)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_response(
|
||||||
|
&self,
|
||||||
|
response_result: Result<reqwest::Response, reqwest::Error>,
|
||||||
|
) -> Result<Vec<usize>, Retry> {
|
||||||
|
let response = match response_result {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) if e.is_timeout() => {
|
||||||
|
return Err(Retry::retry_later(PersonalizationError::Network(format!(
|
||||||
|
"Request timeout: {}",
|
||||||
|
e
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
return Err(Retry::retry_later(PersonalizationError::Network(format!(
|
||||||
|
"Network error: {}",
|
||||||
|
e
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let status = response.status();
|
||||||
|
let status_code = status.as_u16();
|
||||||
|
|
||||||
|
if status.is_success() {
|
||||||
|
let rerank_response: CohereRerankResponse = match response.json().await {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
return Err(Retry::retry_later(PersonalizationError::Parse(format!(
|
||||||
|
"Failed to parse response: {}",
|
||||||
|
e
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract indices from rerank results
|
||||||
|
let indices: Vec<usize> =
|
||||||
|
rerank_response.results.iter().map(|result| result.index as usize).collect();
|
||||||
|
|
||||||
|
return Ok(indices);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle error status codes
|
||||||
|
let error_body = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||||
|
|
||||||
|
let retry = match status_code {
|
||||||
|
401 => Retry::give_up(PersonalizationError::Unauthorized),
|
||||||
|
429 => Retry::rate_limited(PersonalizationError::RateLimited),
|
||||||
|
400 => Retry::give_up(PersonalizationError::BadRequest(error_body)),
|
||||||
|
500..=599 => Retry::retry_later(PersonalizationError::InternalServerError(format!(
|
||||||
|
"Status {}: {}",
|
||||||
|
status_code, error_body
|
||||||
|
))),
|
||||||
|
402..=499 => Retry::give_up(PersonalizationError::Api(format!(
|
||||||
|
"Status {}: {}",
|
||||||
|
status_code, error_body
|
||||||
|
))),
|
||||||
|
_ => Retry::retry_later(PersonalizationError::Api(format!(
|
||||||
|
"Unexpected status {}: {}",
|
||||||
|
status_code, error_body
|
||||||
|
))),
|
||||||
|
};
|
||||||
|
|
||||||
|
Err(retry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct CohereRerankRequest {
|
||||||
|
query: String,
|
||||||
|
documents: Vec<String>,
|
||||||
|
model: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct CohereRerankResponse {
|
||||||
|
results: Vec<CohereRerankResult>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct CohereRerankResult {
|
||||||
|
index: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retry strategy similar to vector extraction
|
||||||
|
struct Retry {
|
||||||
|
error: PersonalizationError,
|
||||||
|
strategy: RetryStrategy,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RetryStrategy {
|
||||||
|
GiveUp,
|
||||||
|
Retry,
|
||||||
|
RetryAfterRateLimit,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Retry {
|
||||||
|
fn give_up(error: PersonalizationError) -> Self {
|
||||||
|
Self { error, strategy: RetryStrategy::GiveUp }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retry_later(error: PersonalizationError) -> Self {
|
||||||
|
Self { error, strategy: RetryStrategy::Retry }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rate_limited(error: PersonalizationError) -> Self {
|
||||||
|
Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_duration(self, attempt: u32) -> Result<Duration, PersonalizationError> {
|
||||||
|
match self.strategy {
|
||||||
|
RetryStrategy::GiveUp => Err(self.error),
|
||||||
|
RetryStrategy::Retry => {
|
||||||
|
// Exponential backoff: 10^attempt milliseconds
|
||||||
|
Ok(Duration::from_millis((10u64).pow(attempt)))
|
||||||
|
}
|
||||||
|
RetryStrategy::RetryAfterRateLimit => {
|
||||||
|
// Longer backoff for rate limits: 100ms + exponential
|
||||||
|
Ok(Duration::from_millis(100 + (10u64).pow(attempt)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_error(self) -> PersonalizationError {
|
||||||
|
self.error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum PersonalizationService {
|
||||||
|
Cohere(CohereService),
|
||||||
|
Disabled,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PersonalizationService {
|
||||||
|
pub fn cohere(api_key: String) -> Self {
|
||||||
|
// If the API key is empty, consider the personalization service as disabled
|
||||||
|
if api_key.trim().is_empty() {
|
||||||
|
Self::disabled()
|
||||||
|
} else {
|
||||||
|
Self::Cohere(CohereService::new(api_key))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn disabled() -> Self {
|
||||||
|
debug!("Personalization service disabled");
|
||||||
|
Self::Disabled
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn rerank_search_results(
|
||||||
|
&self,
|
||||||
|
search_result: SearchResult,
|
||||||
|
personalize: &Personalize,
|
||||||
|
query: Option<&str>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
|
) -> Result<SearchResult, ResponseError> {
|
||||||
|
match self {
|
||||||
|
Self::Cohere(cohere_service) => {
|
||||||
|
cohere_service
|
||||||
|
.rerank_search_results(search_result, personalize, query, time_budget)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
Self::Disabled => Err(PersonalizationError::FeatureNotEnabled(
|
||||||
|
index_scheduler::error::FeatureNotEnabledError {
|
||||||
|
disabled_action: "reranking search results",
|
||||||
|
feature: "personalization",
|
||||||
|
issue_link: "https://github.com/orgs/meilisearch/discussions/866",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -333,10 +333,12 @@ impl Aggregate for DocumentsDeletionAggregator {
|
|||||||
pub async fn delete_document(
|
pub async fn delete_document(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
path: web::Path<DocumentParam>,
|
path: web::Path<DocumentParam>,
|
||||||
|
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||||
let index_uid = IndexUid::try_from(index_uid)?;
|
let index_uid = IndexUid::try_from(index_uid)?;
|
||||||
let network = index_scheduler.network();
|
let network = index_scheduler.network();
|
||||||
@@ -359,7 +361,10 @@ pub async fn delete_document(
|
|||||||
let dry_run = is_dry_run(&req, &opt)?;
|
let dry_run = is_dry_run(&req, &opt)?;
|
||||||
let task = {
|
let task = {
|
||||||
let index_scheduler = index_scheduler.clone();
|
let index_scheduler = index_scheduler.clone();
|
||||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
tokio::task::spawn_blocking(move || {
|
||||||
|
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||||
|
})
|
||||||
|
.await??
|
||||||
};
|
};
|
||||||
|
|
||||||
if network.sharding && !dry_run {
|
if network.sharding && !dry_run {
|
||||||
@@ -678,6 +683,19 @@ pub struct UpdateDocumentsQuery {
|
|||||||
#[param(value_type = char, default = ",", example = ";")]
|
#[param(value_type = char, default = ",", example = ";")]
|
||||||
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
|
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
|
||||||
pub csv_delimiter: Option<u8>,
|
pub csv_delimiter: Option<u8>,
|
||||||
|
|
||||||
|
#[param(example = "custom")]
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
|
||||||
|
pub custom_metadata: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Deserr, IntoParams)]
|
||||||
|
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
#[into_params(parameter_in = Query, rename_all = "camelCase")]
|
||||||
|
pub struct CustomMetadataQuery {
|
||||||
|
#[param(example = "custom")]
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
|
||||||
|
pub custom_metadata: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_char_csv_delimiter(
|
fn from_char_csv_delimiter(
|
||||||
@@ -819,6 +837,7 @@ pub async fn replace_documents(
|
|||||||
body,
|
body,
|
||||||
IndexDocumentsMethod::ReplaceDocuments,
|
IndexDocumentsMethod::ReplaceDocuments,
|
||||||
uid,
|
uid,
|
||||||
|
params.custom_metadata,
|
||||||
dry_run,
|
dry_run,
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
&req,
|
&req,
|
||||||
@@ -921,6 +940,7 @@ pub async fn update_documents(
|
|||||||
body,
|
body,
|
||||||
IndexDocumentsMethod::UpdateDocuments,
|
IndexDocumentsMethod::UpdateDocuments,
|
||||||
uid,
|
uid,
|
||||||
|
params.custom_metadata,
|
||||||
dry_run,
|
dry_run,
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
&req,
|
&req,
|
||||||
@@ -940,6 +960,7 @@ async fn document_addition(
|
|||||||
body: Payload,
|
body: Payload,
|
||||||
method: IndexDocumentsMethod,
|
method: IndexDocumentsMethod,
|
||||||
task_id: Option<TaskId>,
|
task_id: Option<TaskId>,
|
||||||
|
custom_metadata: Option<String>,
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
allow_index_creation: bool,
|
allow_index_creation: bool,
|
||||||
req: &HttpRequest,
|
req: &HttpRequest,
|
||||||
@@ -1065,8 +1086,10 @@ async fn document_addition(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let scheduler = index_scheduler.clone();
|
let scheduler = index_scheduler.clone();
|
||||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
|
let task = match tokio::task::spawn_blocking(move || {
|
||||||
.await?
|
scheduler.register_with_custom_metadata(task, task_id, custom_metadata, dry_run)
|
||||||
|
})
|
||||||
|
.await?
|
||||||
{
|
{
|
||||||
Ok(task) => task,
|
Ok(task) => task,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -1130,7 +1153,7 @@ async fn copy_body_to_file(
|
|||||||
/// Delete a set of documents based on an array of document ids.
|
/// Delete a set of documents based on an array of document ids.
|
||||||
#[utoipa::path(
|
#[utoipa::path(
|
||||||
post,
|
post,
|
||||||
path = "{indexUid}/delete-batch",
|
path = "{indexUid}/documents/delete-batch",
|
||||||
tag = "Documents",
|
tag = "Documents",
|
||||||
security(("Bearer" = ["documents.delete", "documents.*", "*"])),
|
security(("Bearer" = ["documents.delete", "documents.*", "*"])),
|
||||||
params(
|
params(
|
||||||
@@ -1161,11 +1184,14 @@ pub async fn delete_documents_batch(
|
|||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
body: web::Json<Vec<Value>>,
|
body: web::Json<Vec<Value>>,
|
||||||
|
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?body, "Delete documents by batch");
|
debug!(parameters = ?body, "Delete documents by batch");
|
||||||
|
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||||
|
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let network = index_scheduler.network();
|
let network = index_scheduler.network();
|
||||||
|
|
||||||
@@ -1190,7 +1216,10 @@ pub async fn delete_documents_batch(
|
|||||||
let dry_run = is_dry_run(&req, &opt)?;
|
let dry_run = is_dry_run(&req, &opt)?;
|
||||||
let task = {
|
let task = {
|
||||||
let index_scheduler = index_scheduler.clone();
|
let index_scheduler = index_scheduler.clone();
|
||||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
tokio::task::spawn_blocking(move || {
|
||||||
|
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||||
|
})
|
||||||
|
.await??
|
||||||
};
|
};
|
||||||
|
|
||||||
if network.sharding && !dry_run {
|
if network.sharding && !dry_run {
|
||||||
@@ -1244,12 +1273,15 @@ pub struct DocumentDeletionByFilter {
|
|||||||
pub async fn delete_documents_by_filter(
|
pub async fn delete_documents_by_filter(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
|
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||||
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?body, "Delete documents by filter");
|
debug!(parameters = ?body, "Delete documents by filter");
|
||||||
|
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||||
|
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let index_uid = index_uid.into_inner();
|
let index_uid = index_uid.into_inner();
|
||||||
let filter = body.into_inner();
|
let filter = body.into_inner();
|
||||||
@@ -1282,7 +1314,10 @@ pub async fn delete_documents_by_filter(
|
|||||||
let dry_run = is_dry_run(&req, &opt)?;
|
let dry_run = is_dry_run(&req, &opt)?;
|
||||||
let task = {
|
let task = {
|
||||||
let index_scheduler = index_scheduler.clone();
|
let index_scheduler = index_scheduler.clone();
|
||||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
tokio::task::spawn_blocking(move || {
|
||||||
|
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||||
|
})
|
||||||
|
.await??
|
||||||
};
|
};
|
||||||
|
|
||||||
if network.sharding && !dry_run {
|
if network.sharding && !dry_run {
|
||||||
@@ -1372,12 +1407,14 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
|
|||||||
pub async fn edit_documents_by_function(
|
pub async fn edit_documents_by_function(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||||
|
body: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?params, "Edit documents by function");
|
debug!(parameters = ?body, "Edit documents by function");
|
||||||
|
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||||
|
|
||||||
index_scheduler
|
index_scheduler
|
||||||
.features()
|
.features()
|
||||||
@@ -1387,23 +1424,23 @@ pub async fn edit_documents_by_function(
|
|||||||
|
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let index_uid = index_uid.into_inner();
|
let index_uid = index_uid.into_inner();
|
||||||
let params = params.into_inner();
|
let body = body.into_inner();
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
EditDocumentsByFunctionAggregator {
|
EditDocumentsByFunctionAggregator {
|
||||||
filtered: params.filter.is_some(),
|
filtered: body.filter.is_some(),
|
||||||
with_context: params.context.is_some(),
|
with_context: body.context.is_some(),
|
||||||
index_creation: index_scheduler.index(&index_uid).is_err(),
|
index_creation: index_scheduler.index(&index_uid).is_err(),
|
||||||
},
|
},
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
|
||||||
let engine = milli::rhai::Engine::new();
|
let engine = milli::rhai::Engine::new();
|
||||||
if let Err(e) = engine.compile(¶ms.function) {
|
if let Err(e) = engine.compile(&body.function) {
|
||||||
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(ref filter) = params.filter {
|
if let Some(ref filter) = body.filter {
|
||||||
// we ensure the filter is well formed before enqueuing it
|
// we ensure the filter is well formed before enqueuing it
|
||||||
crate::search::parse_filter(
|
crate::search::parse_filter(
|
||||||
filter,
|
filter,
|
||||||
@@ -1414,8 +1451,8 @@ pub async fn edit_documents_by_function(
|
|||||||
}
|
}
|
||||||
let task = KindWithContent::DocumentEdition {
|
let task = KindWithContent::DocumentEdition {
|
||||||
index_uid: index_uid.clone(),
|
index_uid: index_uid.clone(),
|
||||||
filter_expr: params.filter.clone(),
|
filter_expr: body.filter.clone(),
|
||||||
context: match params.context.clone() {
|
context: match body.context.clone() {
|
||||||
Some(Value::Object(m)) => Some(m),
|
Some(Value::Object(m)) => Some(m),
|
||||||
None => None,
|
None => None,
|
||||||
_ => {
|
_ => {
|
||||||
@@ -1425,18 +1462,21 @@ pub async fn edit_documents_by_function(
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
function: params.function.clone(),
|
function: body.function.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let uid = get_task_id(&req, &opt)?;
|
let uid = get_task_id(&req, &opt)?;
|
||||||
let dry_run = is_dry_run(&req, &opt)?;
|
let dry_run = is_dry_run(&req, &opt)?;
|
||||||
let task = {
|
let task = {
|
||||||
let index_scheduler = index_scheduler.clone();
|
let index_scheduler = index_scheduler.clone();
|
||||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
tokio::task::spawn_blocking(move || {
|
||||||
|
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||||
|
})
|
||||||
|
.await??
|
||||||
};
|
};
|
||||||
|
|
||||||
if network.sharding && !dry_run {
|
if network.sharding && !dry_run {
|
||||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
|
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let task: SummarizedTaskView = task.into();
|
let task: SummarizedTaskView = task.into();
|
||||||
@@ -1477,12 +1517,14 @@ pub async fn edit_documents_by_function(
|
|||||||
pub async fn clear_all_documents(
|
pub async fn clear_all_documents(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
|
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
opt: web::Data<Opt>,
|
opt: web::Data<Opt>,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let network = index_scheduler.network();
|
let network = index_scheduler.network();
|
||||||
|
let CustomMetadataQuery { custom_metadata } = params.into_inner();
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
DocumentsDeletionAggregator {
|
DocumentsDeletionAggregator {
|
||||||
@@ -1501,7 +1543,10 @@ pub async fn clear_all_documents(
|
|||||||
let task = {
|
let task = {
|
||||||
let index_scheduler = index_scheduler.clone();
|
let index_scheduler = index_scheduler.clone();
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
tokio::task::spawn_blocking(move || {
|
||||||
|
index_scheduler.register_with_custom_metadata(task, uid, custom_metadata, dry_run)
|
||||||
|
})
|
||||||
|
.await??
|
||||||
};
|
};
|
||||||
|
|
||||||
if network.sharding && !dry_run {
|
if network.sharding && !dry_run {
|
||||||
|
|||||||
@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
personalize: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,9 +24,9 @@ use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
|||||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||||
use crate::routes::parse_include_metadata_header;
|
use crate::routes::parse_include_metadata_header;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
|
||||||
RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult, SemanticRatio,
|
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult,
|
||||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
@@ -134,6 +134,8 @@ pub struct SearchQueryGet {
|
|||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
||||||
#[param(value_type = Vec<Locale>, explode = false)]
|
#[param(value_type = Vec<Locale>, explode = false)]
|
||||||
pub locales: Option<CS<Locale>>,
|
pub locales: Option<CS<Locale>>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
|
||||||
|
pub personalize_user_context: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
@@ -205,6 +207,9 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let personalize =
|
||||||
|
other.personalize_user_context.map(|user_context| Personalize { user_context });
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
q: other.q,
|
q: other.q,
|
||||||
// `media` not supported for `GET`
|
// `media` not supported for `GET`
|
||||||
@@ -234,6 +239,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
|||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||||
|
personalize,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -322,6 +328,7 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
|||||||
pub async fn search_with_url_query(
|
pub async fn search_with_url_query(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
search_queue: web::Data<SearchQueue>,
|
search_queue: web::Data<SearchQueue>,
|
||||||
|
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
@@ -342,9 +349,16 @@ pub async fn search_with_url_query(
|
|||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
|
// Extract personalization and query string before moving query
|
||||||
|
let personalize = query.personalize.take();
|
||||||
|
|
||||||
let search_kind =
|
let search_kind =
|
||||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||||
|
|
||||||
|
// Save the query string for personalization if requested
|
||||||
|
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||||
|
|
||||||
let permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
let include_metadata = parse_include_metadata_header(&req);
|
let include_metadata = parse_include_metadata_header(&req);
|
||||||
|
|
||||||
@@ -365,12 +379,24 @@ pub async fn search_with_url_query(
|
|||||||
.await;
|
.await;
|
||||||
permit.drop().await;
|
permit.drop().await;
|
||||||
let search_result = search_result?;
|
let search_result = search_result?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok((search_result, _)) = search_result.as_ref() {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
}
|
}
|
||||||
analytics.publish(aggregate, &req);
|
analytics.publish(aggregate, &req);
|
||||||
|
|
||||||
let search_result = search_result?;
|
let (mut search_result, time_budget) = search_result?;
|
||||||
|
|
||||||
|
// Apply personalization if requested
|
||||||
|
if let Some(personalize) = personalize.as_ref() {
|
||||||
|
search_result = personalization_service
|
||||||
|
.rerank_search_results(
|
||||||
|
search_result,
|
||||||
|
personalize,
|
||||||
|
personalize_query.as_deref(),
|
||||||
|
time_budget,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
|
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
|
||||||
Ok(HttpResponse::Ok().json(search_result))
|
Ok(HttpResponse::Ok().json(search_result))
|
||||||
@@ -435,6 +461,7 @@ pub async fn search_with_url_query(
|
|||||||
pub async fn search_with_post(
|
pub async fn search_with_post(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
search_queue: web::Data<SearchQueue>,
|
search_queue: web::Data<SearchQueue>,
|
||||||
|
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
params: AwebJson<SearchQuery, DeserrJsonError>,
|
params: AwebJson<SearchQuery, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
@@ -455,12 +482,18 @@ pub async fn search_with_post(
|
|||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
|
// Extract personalization and query string before moving query
|
||||||
|
let personalize = query.personalize.take();
|
||||||
|
|
||||||
let search_kind =
|
let search_kind =
|
||||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||||
|
|
||||||
let include_metadata = parse_include_metadata_header(&req);
|
let include_metadata = parse_include_metadata_header(&req);
|
||||||
|
|
||||||
|
// Save the query string for personalization if requested
|
||||||
|
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||||
|
|
||||||
let permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_search(
|
perform_search(
|
||||||
@@ -479,7 +512,7 @@ pub async fn search_with_post(
|
|||||||
.await;
|
.await;
|
||||||
permit.drop().await;
|
permit.drop().await;
|
||||||
let search_result = search_result?;
|
let search_result = search_result?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok((ref search_result, _)) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
if search_result.degraded {
|
if search_result.degraded {
|
||||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||||
@@ -487,7 +520,19 @@ pub async fn search_with_post(
|
|||||||
}
|
}
|
||||||
analytics.publish(aggregate, &req);
|
analytics.publish(aggregate, &req);
|
||||||
|
|
||||||
let search_result = search_result?;
|
let (mut search_result, time_budget) = search_result?;
|
||||||
|
|
||||||
|
// Apply personalization if requested
|
||||||
|
if let Some(personalize) = personalize.as_ref() {
|
||||||
|
search_result = personalization_service
|
||||||
|
.rerank_search_results(
|
||||||
|
search_result,
|
||||||
|
personalize,
|
||||||
|
personalize_query.as_deref(),
|
||||||
|
time_budget,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
|
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
|
||||||
Ok(HttpResponse::Ok().json(search_result))
|
Ok(HttpResponse::Ok().json(search_result))
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ use serde_json::{json, Value};
|
|||||||
|
|
||||||
use crate::aggregate_methods;
|
use crate::aggregate_methods;
|
||||||
use crate::analytics::{Aggregate, AggregateMethod};
|
use crate::analytics::{Aggregate, AggregateMethod};
|
||||||
|
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
@@ -95,6 +96,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
|||||||
show_ranking_score_details: bool,
|
show_ranking_score_details: bool,
|
||||||
ranking_score_threshold: bool,
|
ranking_score_threshold: bool,
|
||||||
|
|
||||||
|
// personalization
|
||||||
|
total_personalized: usize,
|
||||||
|
|
||||||
marker: std::marker::PhantomData<Method>,
|
marker: std::marker::PhantomData<Method>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -129,6 +133,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
personalize,
|
||||||
} = query;
|
} = query;
|
||||||
|
|
||||||
let mut ret = Self::default();
|
let mut ret = Self::default();
|
||||||
@@ -204,6 +209,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|||||||
ret.locales = locales.iter().copied().collect();
|
ret.locales = locales.iter().copied().collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// personalization
|
||||||
|
if personalize.is_some() {
|
||||||
|
ret.total_personalized = 1;
|
||||||
|
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
|
||||||
|
}
|
||||||
|
|
||||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
||||||
@@ -296,6 +307,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
mut locales,
|
mut locales,
|
||||||
|
total_personalized,
|
||||||
marker: _,
|
marker: _,
|
||||||
} = *new;
|
} = *new;
|
||||||
|
|
||||||
@@ -381,6 +393,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
// locales
|
// locales
|
||||||
self.locales.append(&mut locales);
|
self.locales.append(&mut locales);
|
||||||
|
|
||||||
|
// personalization
|
||||||
|
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
|
||||||
|
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -426,6 +441,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
total_personalized,
|
||||||
marker: _,
|
marker: _,
|
||||||
} = *self;
|
} = *self;
|
||||||
|
|
||||||
@@ -499,6 +515,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
"show_ranking_score_details": show_ranking_score_details,
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
"ranking_score_threshold": ranking_score_threshold,
|
"ranking_score_threshold": ranking_score_threshold,
|
||||||
},
|
},
|
||||||
|
"personalization": {
|
||||||
|
"total_personalized": total_personalized,
|
||||||
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,7 +41,9 @@ use crate::routes::indexes::IndexView;
|
|||||||
use crate::routes::multi_search::SearchResults;
|
use crate::routes::multi_search::SearchResults;
|
||||||
use crate::routes::network::{Network, Remote};
|
use crate::routes::network::{Network, Remote};
|
||||||
use crate::routes::swap_indexes::SwapIndexesPayload;
|
use crate::routes::swap_indexes::SwapIndexesPayload;
|
||||||
use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata};
|
use crate::routes::webhooks::{
|
||||||
|
WebhookResults, WebhookSettings, WebhookWithMetadataRedactedAuthorization,
|
||||||
|
};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||||
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
|
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
|
||||||
@@ -103,7 +105,7 @@ mod webhooks;
|
|||||||
url = "/",
|
url = "/",
|
||||||
description = "Local server",
|
description = "Local server",
|
||||||
)),
|
)),
|
||||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadata, meilisearch_types::milli::vector::VectorStoreBackend))
|
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadataRedactedAuthorization, meilisearch_types::milli::vector::VectorStoreBackend))
|
||||||
)]
|
)]
|
||||||
pub struct MeilisearchApi;
|
pub struct MeilisearchApi;
|
||||||
|
|
||||||
@@ -216,6 +218,8 @@ pub struct SummarizedTaskView {
|
|||||||
deserialize_with = "time::serde::rfc3339::deserialize"
|
deserialize_with = "time::serde::rfc3339::deserialize"
|
||||||
)]
|
)]
|
||||||
enqueued_at: OffsetDateTime,
|
enqueued_at: OffsetDateTime,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
custom_metadata: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Task> for SummarizedTaskView {
|
impl From<Task> for SummarizedTaskView {
|
||||||
@@ -226,6 +230,7 @@ impl From<Task> for SummarizedTaskView {
|
|||||||
status: task.status,
|
status: task.status,
|
||||||
kind: task.kind.as_kind(),
|
kind: task.kind.as_kind(),
|
||||||
enqueued_at: task.enqueued_at,
|
enqueued_at: task.enqueued_at,
|
||||||
|
custom_metadata: task.custom_metadata,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -146,6 +146,7 @@ pub struct SearchResults {
|
|||||||
pub async fn multi_search_with_post(
|
pub async fn multi_search_with_post(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
|
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||||
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<Analytics>,
|
analytics: web::Data<Analytics>,
|
||||||
@@ -236,7 +237,7 @@ pub async fn multi_search_with_post(
|
|||||||
// changes.
|
// changes.
|
||||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||||
let mut search_results = Vec::with_capacity(queries.len());
|
let mut search_results = Vec::with_capacity(queries.len());
|
||||||
for (query_index, (index_uid, query, federation_options)) in queries
|
for (query_index, (index_uid, mut query, federation_options)) in queries
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(SearchQueryWithIndex::into_index_query_federation)
|
.map(SearchQueryWithIndex::into_index_query_federation)
|
||||||
.enumerate()
|
.enumerate()
|
||||||
@@ -269,6 +270,13 @@ pub async fn multi_search_with_post(
|
|||||||
})
|
})
|
||||||
.with_index(query_index)?;
|
.with_index(query_index)?;
|
||||||
|
|
||||||
|
// Extract personalization and query string before moving query
|
||||||
|
let personalize = query.personalize.take();
|
||||||
|
|
||||||
|
// Save the query string for personalization if requested
|
||||||
|
let personalize_query =
|
||||||
|
personalize.is_some().then(|| query.q.clone()).flatten();
|
||||||
|
|
||||||
let index_uid_str = index_uid.to_string();
|
let index_uid_str = index_uid.to_string();
|
||||||
|
|
||||||
let search_kind = search_kind(
|
let search_kind = search_kind(
|
||||||
@@ -280,7 +288,7 @@ pub async fn multi_search_with_post(
|
|||||||
.with_index(query_index)?;
|
.with_index(query_index)?;
|
||||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||||
|
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
|
||||||
perform_search(
|
perform_search(
|
||||||
SearchParams {
|
SearchParams {
|
||||||
index_uid: index_uid_str.clone(),
|
index_uid: index_uid_str.clone(),
|
||||||
@@ -295,11 +303,25 @@ pub async fn multi_search_with_post(
|
|||||||
)
|
)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
|
.with_index(query_index)?
|
||||||
.with_index(query_index)?;
|
.with_index(query_index)?;
|
||||||
|
|
||||||
|
// Apply personalization if requested
|
||||||
|
if let Some(personalize) = personalize.as_ref() {
|
||||||
|
search_result = personalization_service
|
||||||
|
.rerank_search_results(
|
||||||
|
search_result,
|
||||||
|
personalize,
|
||||||
|
personalize_query.as_deref(),
|
||||||
|
time_budget,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.with_index(query_index)?;
|
||||||
|
}
|
||||||
|
|
||||||
search_results.push(SearchResultWithIndex {
|
search_results.push(SearchResultWithIndex {
|
||||||
index_uid: index_uid.into_inner(),
|
index_uid: index_uid.into_inner(),
|
||||||
result: search_result.with_index(query_index)?,
|
result: search_result,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Ok(search_results)
|
Ok(search_results)
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
|
|||||||
hybrid: _,
|
hybrid: _,
|
||||||
ranking_score_threshold: _,
|
ranking_score_threshold: _,
|
||||||
locales: _,
|
locales: _,
|
||||||
|
personalize: _,
|
||||||
} in &federated_search.queries
|
} in &federated_search.queries
|
||||||
{
|
{
|
||||||
if let Some(federation_options) = federation_options {
|
if let Some(federation_options) = federation_options {
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ fn deny_immutable_fields_webhook(
|
|||||||
#[derive(Debug, Serialize, ToSchema)]
|
#[derive(Debug, Serialize, ToSchema)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
#[schema(rename_all = "camelCase")]
|
#[schema(rename_all = "camelCase")]
|
||||||
pub(super) struct WebhookWithMetadata {
|
pub(super) struct WebhookWithMetadataRedactedAuthorization {
|
||||||
uuid: Uuid,
|
uuid: Uuid,
|
||||||
is_editable: bool,
|
is_editable: bool,
|
||||||
#[schema(value_type = WebhookSettings)]
|
#[schema(value_type = WebhookSettings)]
|
||||||
@@ -98,8 +98,9 @@ pub(super) struct WebhookWithMetadata {
|
|||||||
webhook: Webhook,
|
webhook: Webhook,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WebhookWithMetadata {
|
impl WebhookWithMetadataRedactedAuthorization {
|
||||||
pub fn from(uuid: Uuid, webhook: Webhook) -> Self {
|
pub fn from(uuid: Uuid, mut webhook: Webhook) -> Self {
|
||||||
|
webhook.redact_authorization_header();
|
||||||
Self { uuid, is_editable: uuid != Uuid::nil(), webhook }
|
Self { uuid, is_editable: uuid != Uuid::nil(), webhook }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -107,7 +108,7 @@ impl WebhookWithMetadata {
|
|||||||
#[derive(Debug, Serialize, ToSchema)]
|
#[derive(Debug, Serialize, ToSchema)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub(super) struct WebhookResults {
|
pub(super) struct WebhookResults {
|
||||||
results: Vec<WebhookWithMetadata>,
|
results: Vec<WebhookWithMetadataRedactedAuthorization>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[utoipa::path(
|
#[utoipa::path(
|
||||||
@@ -150,7 +151,7 @@ async fn get_webhooks(
|
|||||||
let results = webhooks
|
let results = webhooks
|
||||||
.webhooks
|
.webhooks
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(uuid, webhook)| WebhookWithMetadata::from(uuid, webhook))
|
.map(|(uuid, webhook)| WebhookWithMetadataRedactedAuthorization::from(uuid, webhook))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
let results = WebhookResults { results };
|
let results = WebhookResults { results };
|
||||||
|
|
||||||
@@ -301,7 +302,7 @@ fn check_changed(uuid: Uuid, webhook: &Webhook) -> Result<(), WebhooksError> {
|
|||||||
tag = "Webhooks",
|
tag = "Webhooks",
|
||||||
security(("Bearer" = ["webhooks.get", "webhooks.*", "*.get", "*"])),
|
security(("Bearer" = ["webhooks.get", "webhooks.*", "*.get", "*"])),
|
||||||
responses(
|
responses(
|
||||||
(status = 200, description = "Webhook found", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
(status = 200, description = "Webhook found", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||||
"url": "https://your.site/on-tasks-completed",
|
"url": "https://your.site/on-tasks-completed",
|
||||||
"headers": {
|
"headers": {
|
||||||
@@ -324,7 +325,7 @@ async fn get_webhook(
|
|||||||
let mut webhooks = index_scheduler.webhooks_view();
|
let mut webhooks = index_scheduler.webhooks_view();
|
||||||
|
|
||||||
let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
|
let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
|
||||||
let webhook = WebhookWithMetadata::from(uuid, webhook);
|
let webhook = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||||
|
|
||||||
debug!(returns = ?webhook, "Get webhook");
|
debug!(returns = ?webhook, "Get webhook");
|
||||||
Ok(HttpResponse::Ok().json(webhook))
|
Ok(HttpResponse::Ok().json(webhook))
|
||||||
@@ -337,7 +338,7 @@ async fn get_webhook(
|
|||||||
request_body = WebhookSettings,
|
request_body = WebhookSettings,
|
||||||
security(("Bearer" = ["webhooks.create", "webhooks.*", "*"])),
|
security(("Bearer" = ["webhooks.create", "webhooks.*", "*"])),
|
||||||
responses(
|
responses(
|
||||||
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||||
"url": "https://your.site/on-tasks-completed",
|
"url": "https://your.site/on-tasks-completed",
|
||||||
"headers": {
|
"headers": {
|
||||||
@@ -383,7 +384,7 @@ async fn post_webhook(
|
|||||||
|
|
||||||
analytics.publish(PostWebhooksAnalytics, &req);
|
analytics.publish(PostWebhooksAnalytics, &req);
|
||||||
|
|
||||||
let response = WebhookWithMetadata::from(uuid, webhook);
|
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||||
debug!(returns = ?response, "Post webhook");
|
debug!(returns = ?response, "Post webhook");
|
||||||
Ok(HttpResponse::Created().json(response))
|
Ok(HttpResponse::Created().json(response))
|
||||||
}
|
}
|
||||||
@@ -395,7 +396,7 @@ async fn post_webhook(
|
|||||||
request_body = WebhookSettings,
|
request_body = WebhookSettings,
|
||||||
security(("Bearer" = ["webhooks.update", "webhooks.*", "*"])),
|
security(("Bearer" = ["webhooks.update", "webhooks.*", "*"])),
|
||||||
responses(
|
responses(
|
||||||
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
|
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
|
||||||
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
"uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||||
"url": "https://your.site/on-tasks-completed",
|
"url": "https://your.site/on-tasks-completed",
|
||||||
"headers": {
|
"headers": {
|
||||||
@@ -435,7 +436,7 @@ async fn patch_webhook(
|
|||||||
|
|
||||||
analytics.publish(PatchWebhooksAnalytics, &req);
|
analytics.publish(PatchWebhooksAnalytics, &req);
|
||||||
|
|
||||||
let response = WebhookWithMetadata::from(uuid, webhook);
|
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
|
||||||
debug!(returns = ?response, "Patch webhook");
|
debug!(returns = ?response, "Patch webhook");
|
||||||
Ok(HttpResponse::Ok().json(response))
|
Ok(HttpResponse::Ok().json(response))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -601,6 +601,10 @@ impl PartitionedQueries {
|
|||||||
.into());
|
.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if federated_query.has_personalize() {
|
||||||
|
return Err(MeilisearchHttpError::PersonalizationInFederatedQuery(query_index).into());
|
||||||
|
}
|
||||||
|
|
||||||
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||||
|
|
||||||
let federation_options = federation_options.unwrap_or_default();
|
let federation_options = federation_options.unwrap_or_default();
|
||||||
|
|||||||
@@ -59,6 +59,13 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
|||||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||||
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
||||||
|
|
||||||
|
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct Personalize {
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
|
||||||
|
pub user_context: String,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
@@ -122,6 +129,8 @@ pub struct SearchQuery {
|
|||||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
|
||||||
pub locales: Option<Vec<Locale>>,
|
pub locales: Option<Vec<Locale>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||||
|
pub personalize: Option<Personalize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<SearchParameters> for SearchQuery {
|
impl From<SearchParameters> for SearchQuery {
|
||||||
@@ -169,6 +178,7 @@ impl From<SearchParameters> for SearchQuery {
|
|||||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||||
crop_marker: DEFAULT_CROP_MARKER(),
|
crop_marker: DEFAULT_CROP_MARKER(),
|
||||||
locales: None,
|
locales: None,
|
||||||
|
personalize: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -250,6 +260,7 @@ impl fmt::Debug for SearchQuery {
|
|||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
personalize,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
let mut debug = f.debug_struct("SearchQuery");
|
let mut debug = f.debug_struct("SearchQuery");
|
||||||
@@ -338,6 +349,10 @@ impl fmt::Debug for SearchQuery {
|
|||||||
debug.field("locales", &locales);
|
debug.field("locales", &locales);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(personalize) = personalize {
|
||||||
|
debug.field("personalize", &personalize);
|
||||||
|
}
|
||||||
|
|
||||||
debug.finish()
|
debug.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -543,6 +558,9 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||||
pub locales: Option<Vec<Locale>>,
|
pub locales: Option<Vec<Locale>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||||
|
#[serde(skip)]
|
||||||
|
pub personalize: Option<Personalize>,
|
||||||
|
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub federation_options: Option<FederationOptions>,
|
pub federation_options: Option<FederationOptions>,
|
||||||
@@ -567,6 +585,10 @@ impl SearchQueryWithIndex {
|
|||||||
self.facets.as_deref().filter(|v| !v.is_empty())
|
self.facets.as_deref().filter(|v| !v.is_empty())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn has_personalize(&self) -> bool {
|
||||||
|
self.personalize.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn from_index_query_federation(
|
pub fn from_index_query_federation(
|
||||||
index_uid: IndexUid,
|
index_uid: IndexUid,
|
||||||
query: SearchQuery,
|
query: SearchQuery,
|
||||||
@@ -600,6 +622,7 @@ impl SearchQueryWithIndex {
|
|||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
personalize,
|
||||||
} = query;
|
} = query;
|
||||||
|
|
||||||
SearchQueryWithIndex {
|
SearchQueryWithIndex {
|
||||||
@@ -631,6 +654,7 @@ impl SearchQueryWithIndex {
|
|||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
personalize,
|
||||||
federation_options,
|
federation_options,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -666,6 +690,7 @@ impl SearchQueryWithIndex {
|
|||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
personalize,
|
||||||
} = self;
|
} = self;
|
||||||
(
|
(
|
||||||
index_uid,
|
index_uid,
|
||||||
@@ -697,6 +722,7 @@ impl SearchQueryWithIndex {
|
|||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
locales,
|
locales,
|
||||||
|
personalize,
|
||||||
// do not use ..Default::default() here,
|
// do not use ..Default::default() here,
|
||||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||||
},
|
},
|
||||||
@@ -1149,7 +1175,10 @@ pub struct SearchParams {
|
|||||||
pub include_metadata: bool,
|
pub include_metadata: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn perform_search(params: SearchParams, index: &Index) -> Result<SearchResult, ResponseError> {
|
pub fn perform_search(
|
||||||
|
params: SearchParams,
|
||||||
|
index: &Index,
|
||||||
|
) -> Result<(SearchResult, TimeBudget), ResponseError> {
|
||||||
let SearchParams {
|
let SearchParams {
|
||||||
index_uid,
|
index_uid,
|
||||||
query,
|
query,
|
||||||
@@ -1168,7 +1197,7 @@ pub fn perform_search(params: SearchParams, index: &Index) -> Result<SearchResul
|
|||||||
};
|
};
|
||||||
|
|
||||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?;
|
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
|
||||||
|
|
||||||
let (
|
let (
|
||||||
milli::SearchResult {
|
milli::SearchResult {
|
||||||
@@ -1226,6 +1255,7 @@ pub fn perform_search(params: SearchParams, index: &Index) -> Result<SearchResul
|
|||||||
attributes_to_search_on: _,
|
attributes_to_search_on: _,
|
||||||
filter: _,
|
filter: _,
|
||||||
distinct: _,
|
distinct: _,
|
||||||
|
personalize: _,
|
||||||
} = query;
|
} = query;
|
||||||
|
|
||||||
let format = AttributesFormat {
|
let format = AttributesFormat {
|
||||||
@@ -1291,7 +1321,7 @@ pub fn perform_search(params: SearchParams, index: &Index) -> Result<SearchResul
|
|||||||
request_uid: Some(request_uid),
|
request_uid: Some(request_uid),
|
||||||
metadata,
|
metadata,
|
||||||
};
|
};
|
||||||
Ok(result)
|
Ok((result, time_budget))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||||
|
|||||||
@@ -91,7 +91,16 @@ impl<'a> Index<'a, Owned> {
|
|||||||
documents: Value,
|
documents: Value,
|
||||||
primary_key: Option<&str>,
|
primary_key: Option<&str>,
|
||||||
) -> (Value, StatusCode) {
|
) -> (Value, StatusCode) {
|
||||||
self._add_documents(documents, primary_key).await
|
self._add_documents(documents, primary_key, None).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn add_documents_with_custom_metadata(
|
||||||
|
&self,
|
||||||
|
documents: Value,
|
||||||
|
primary_key: Option<&str>,
|
||||||
|
custom_metadata: Option<&str>,
|
||||||
|
) -> (Value, StatusCode) {
|
||||||
|
self._add_documents(documents, primary_key, custom_metadata).await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn raw_add_documents(
|
pub async fn raw_add_documents(
|
||||||
@@ -352,12 +361,25 @@ impl<State> Index<'_, State> {
|
|||||||
&self,
|
&self,
|
||||||
documents: Value,
|
documents: Value,
|
||||||
primary_key: Option<&str>,
|
primary_key: Option<&str>,
|
||||||
|
custom_metadata: Option<&str>,
|
||||||
) -> (Value, StatusCode) {
|
) -> (Value, StatusCode) {
|
||||||
let url = match primary_key {
|
let url = match (primary_key, custom_metadata) {
|
||||||
Some(key) => {
|
(Some(key), Some(meta)) => {
|
||||||
format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key)
|
format!(
|
||||||
|
"/indexes/{}/documents?primaryKey={key}&customMetadata={meta}",
|
||||||
|
urlencode(self.uid.as_ref()),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
|
(None, Some(meta)) => {
|
||||||
|
format!(
|
||||||
|
"/indexes/{}/documents?&customMetadata={meta}",
|
||||||
|
urlencode(self.uid.as_ref()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
(Some(key), None) => {
|
||||||
|
format!("/indexes/{}/documents?&primaryKey={key}", urlencode(self.uid.as_ref()),)
|
||||||
|
}
|
||||||
|
(None, None) => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
|
||||||
};
|
};
|
||||||
self.service.post_encoded(url, documents, self.encoder).await
|
self.service.post_encoded(url, documents, self.encoder).await
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -241,7 +241,7 @@ pub async fn shared_index_with_documents() -> &'static Index<'static, Shared> {
|
|||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
let index = server._index("SHARED_DOCUMENTS").to_shared();
|
let index = server._index("SHARED_DOCUMENTS").to_shared();
|
||||||
let documents = DOCUMENTS.clone();
|
let documents = DOCUMENTS.clone();
|
||||||
let (response, _code) = index._add_documents(documents, None).await;
|
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
let (response, _code) = index
|
let (response, _code) = index
|
||||||
._update_settings(
|
._update_settings(
|
||||||
@@ -284,7 +284,7 @@ pub async fn shared_index_with_score_documents() -> &'static Index<'static, Shar
|
|||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared();
|
let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared();
|
||||||
let documents = SCORE_DOCUMENTS.clone();
|
let documents = SCORE_DOCUMENTS.clone();
|
||||||
let (response, _code) = index._add_documents(documents, None).await;
|
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
let (response, _code) = index
|
let (response, _code) = index
|
||||||
._update_settings(
|
._update_settings(
|
||||||
@@ -361,7 +361,7 @@ pub async fn shared_index_with_nested_documents() -> &'static Index<'static, Sha
|
|||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
|
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
|
||||||
let documents = NESTED_DOCUMENTS.clone();
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
let (response, _code) = index._add_documents(documents, None).await;
|
let (response, _code) = index._add_documents(documents, None, None).await;
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
let (response, _code) = index
|
let (response, _code) = index
|
||||||
._update_settings(
|
._update_settings(
|
||||||
@@ -508,7 +508,7 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
|
|||||||
.get_or_init(|| async {
|
.get_or_init(|| async {
|
||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
let index = server._index("SHARED_GEO_DOCUMENTS").to_shared();
|
let index = server._index("SHARED_GEO_DOCUMENTS").to_shared();
|
||||||
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None).await;
|
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None, None).await;
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (response, _code) = index
|
let (response, _code) = index
|
||||||
@@ -531,7 +531,7 @@ pub async fn shared_index_geojson_documents() -> &'static Index<'static, Shared>
|
|||||||
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
|
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
|
||||||
let countries = include_str!("../documents/geojson/assets/countries.json");
|
let countries = include_str!("../documents/geojson/assets/countries.json");
|
||||||
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
|
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
|
||||||
let (response, _code) = index._add_documents(Value(lille), Some("name")).await;
|
let (response, _code) = index._add_documents(Value(lille), Some("name"), None).await;
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (response, _code) =
|
let (response, _code) =
|
||||||
|
|||||||
@@ -49,8 +49,8 @@ impl Server<Owned> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let options = default_settings(dir.path());
|
let options = default_settings(dir.path());
|
||||||
|
let handle = tokio::runtime::Handle::current();
|
||||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||||
|
|
||||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||||
@@ -65,7 +65,9 @@ impl Server<Owned> {
|
|||||||
|
|
||||||
options.master_key = Some("MASTER_KEY".to_string());
|
options.master_key = Some("MASTER_KEY".to_string());
|
||||||
|
|
||||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
let handle = tokio::runtime::Handle::current();
|
||||||
|
|
||||||
|
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||||
|
|
||||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||||
@@ -78,7 +80,9 @@ impl Server<Owned> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
|
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
|
||||||
let (index_scheduler, auth) = setup_meilisearch(&options)?;
|
let handle = tokio::runtime::Handle::current();
|
||||||
|
|
||||||
|
let (index_scheduler, auth) = setup_meilisearch(&options, handle)?;
|
||||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||||
|
|
||||||
Ok(Server { service, _dir: None, _marker: PhantomData })
|
Ok(Server { service, _dir: None, _marker: PhantomData })
|
||||||
@@ -217,8 +221,9 @@ impl Server<Shared> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let options = default_settings(dir.path());
|
let options = default_settings(dir.path());
|
||||||
|
let handle = tokio::runtime::Handle::current();
|
||||||
|
|
||||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
|
||||||
let service = Service { index_scheduler, auth, api_key: None, options };
|
let service = Service { index_scheduler, auth, api_key: None, options };
|
||||||
|
|
||||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||||
|
|||||||
@@ -10,8 +10,9 @@ use actix_web::test::TestRequest;
|
|||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch::analytics::Analytics;
|
use meilisearch::analytics::Analytics;
|
||||||
|
use meilisearch::personalization::PersonalizationService;
|
||||||
use meilisearch::search_queue::SearchQueue;
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
|
use meilisearch::{create_app, Opt, ServicesData, SubscriberForSecondLayer};
|
||||||
use meilisearch_auth::AuthController;
|
use meilisearch_auth::AuthController;
|
||||||
use tracing::level_filters::LevelFilter;
|
use tracing::level_filters::LevelFilter;
|
||||||
use tracing_subscriber::Layer;
|
use tracing_subscriber::Layer;
|
||||||
@@ -135,14 +136,24 @@ impl Service {
|
|||||||
self.options.experimental_search_queue_size,
|
self.options.experimental_search_queue_size,
|
||||||
NonZeroUsize::new(1).unwrap(),
|
NonZeroUsize::new(1).unwrap(),
|
||||||
);
|
);
|
||||||
|
let personalization_service = self
|
||||||
|
.options
|
||||||
|
.experimental_personalization_api_key
|
||||||
|
.clone()
|
||||||
|
.map(PersonalizationService::cohere)
|
||||||
|
.unwrap_or_else(PersonalizationService::disabled);
|
||||||
|
|
||||||
actix_web::test::init_service(create_app(
|
actix_web::test::init_service(create_app(
|
||||||
self.index_scheduler.clone().into(),
|
ServicesData {
|
||||||
self.auth.clone().into(),
|
index_scheduler: self.index_scheduler.clone().into(),
|
||||||
Data::new(search_queue),
|
auth: self.auth.clone().into(),
|
||||||
|
search_queue: Data::new(search_queue),
|
||||||
|
personalization_service: Data::new(personalization_service),
|
||||||
|
logs_route_handle: Data::new(route_layer_handle),
|
||||||
|
logs_stderr_handle: Data::new(stderr_layer_handle),
|
||||||
|
analytics: Data::new(Analytics::no_analytics()),
|
||||||
|
},
|
||||||
self.options.clone(),
|
self.options.clone(),
|
||||||
(route_layer_handle, stderr_layer_handle),
|
|
||||||
Data::new(Analytics::no_analytics()),
|
|
||||||
true,
|
true,
|
||||||
))
|
))
|
||||||
.await
|
.await
|
||||||
|
|||||||
@@ -207,3 +207,118 @@ async fn errors() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_with_personalization_without_enabling_the_feature() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
// Create the index and add some documents
|
||||||
|
let (task, _code) = index.create(None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (task, _code) = index
|
||||||
|
.add_documents(
|
||||||
|
json!([
|
||||||
|
{"id": 1, "title": "The Dark Knight", "genre": "Action"},
|
||||||
|
{"id": 2, "title": "Inception", "genre": "Sci-Fi"},
|
||||||
|
{"id": 3, "title": "The Matrix", "genre": "Sci-Fi"}
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Try to search with personalization - should return feature_not_enabled error
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"q": "movie",
|
||||||
|
"personalize": {
|
||||||
|
"userContext": "I love science fiction movies"
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "reranking search results requires enabling the `personalization` experimental feature. See https://github.com/orgs/meilisearch/discussions/866",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn multi_search_with_personalization_without_enabling_the_feature() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
// Create the index and add some documents
|
||||||
|
let (task, _code) = index.create(None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (task, _code) = index
|
||||||
|
.add_documents(
|
||||||
|
json!([
|
||||||
|
{"id": 1, "title": "The Dark Knight", "genre": "Action"},
|
||||||
|
{"id": 2, "title": "Inception", "genre": "Sci-Fi"},
|
||||||
|
{"id": 3, "title": "The Matrix", "genre": "Sci-Fi"}
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Try to multi-search with personalization - should return feature_not_enabled error
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"indexUid": index.uid,
|
||||||
|
"q": "movie",
|
||||||
|
"personalize": {
|
||||||
|
"userContext": "I love science fiction movies"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Inside `.queries[0]`: reranking search results requires enabling the `personalization` experimental feature. See https://github.com/orgs/meilisearch/discussions/866",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Try to federated search with personalization - should return feature_not_enabled error
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({
|
||||||
|
"federation": {},
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"indexUid": index.uid,
|
||||||
|
"q": "movie",
|
||||||
|
"personalize": {
|
||||||
|
"userContext": "I love science fiction movies"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Inside `.queries[0]`: Using `.personalize` is not allowed in federated queries.\n - Hint: remove `personalize` from query #0 or remove `federation` from the request",
|
||||||
|
"code": "invalid_multi_search_query_personalization",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_personalization"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,8 +8,9 @@ use actix_web::http::header::ContentType;
|
|||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
use meilisearch::analytics::Analytics;
|
use meilisearch::analytics::Analytics;
|
||||||
|
use meilisearch::personalization::PersonalizationService;
|
||||||
use meilisearch::search_queue::SearchQueue;
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{create_app, Opt, SubscriberForSecondLayer};
|
use meilisearch::{create_app, Opt, ServicesData, SubscriberForSecondLayer};
|
||||||
use tracing::level_filters::LevelFilter;
|
use tracing::level_filters::LevelFilter;
|
||||||
use tracing_subscriber::layer::SubscriberExt;
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
use tracing_subscriber::Layer;
|
use tracing_subscriber::Layer;
|
||||||
@@ -50,12 +51,16 @@ async fn basic_test_log_stream_route() {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let app = actix_web::test::init_service(create_app(
|
let app = actix_web::test::init_service(create_app(
|
||||||
server.service.index_scheduler.clone().into(),
|
ServicesData {
|
||||||
server.service.auth.clone().into(),
|
index_scheduler: server.service.index_scheduler.clone().into(),
|
||||||
Data::new(search_queue),
|
auth: server.service.auth.clone().into(),
|
||||||
|
search_queue: Data::new(search_queue),
|
||||||
|
personalization_service: Data::new(PersonalizationService::disabled()),
|
||||||
|
logs_route_handle: Data::new(route_layer_handle),
|
||||||
|
logs_stderr_handle: Data::new(stderr_layer_handle),
|
||||||
|
analytics: Data::new(Analytics::no_analytics()),
|
||||||
|
},
|
||||||
server.service.options.clone(),
|
server.service.options.clone(),
|
||||||
(route_layer_handle, stderr_layer_handle),
|
|
||||||
Data::new(Analytics::no_analytics()),
|
|
||||||
true,
|
true,
|
||||||
))
|
))
|
||||||
.await;
|
.await;
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
use meili_snap::*;
|
use meili_snap::*;
|
||||||
|
use meilisearch::Opt;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
use super::test_settings_documents_indexing_swapping_and_search;
|
use super::test_settings_documents_indexing_swapping_and_search;
|
||||||
use crate::common::{shared_does_not_exists_index, Server, DOCUMENTS, NESTED_DOCUMENTS};
|
use crate::common::{
|
||||||
|
default_settings, shared_does_not_exists_index, Server, DOCUMENTS, NESTED_DOCUMENTS,
|
||||||
|
};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@@ -1320,3 +1324,98 @@ async fn search_with_contains_without_enabling_the_feature() {
|
|||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
#[ignore]
|
||||||
|
async fn search_with_personalization_invalid_api_key() {
|
||||||
|
// Create a server with a fake personalization API key
|
||||||
|
let dir = TempDir::new().unwrap();
|
||||||
|
let options = Opt {
|
||||||
|
experimental_personalization_api_key: Some("fake-api-key-12345".to_string()),
|
||||||
|
..default_settings(dir.path())
|
||||||
|
};
|
||||||
|
let server = Server::new_with_options(options).await.unwrap();
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
// Create the index and add some documents
|
||||||
|
let (task, _code) = index.create(None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (task, _code) = index
|
||||||
|
.add_documents(
|
||||||
|
json!([
|
||||||
|
{"id": 1, "title": "The Dark Knight", "genre": "Action"},
|
||||||
|
{"id": 2, "title": "Inception", "genre": "Sci-Fi"},
|
||||||
|
{"id": 3, "title": "The Matrix", "genre": "Sci-Fi"}
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Try to search with personalization - should return remote_invalid_api_key error
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"q": "the",
|
||||||
|
"personalize": {
|
||||||
|
"userContext": "I love science fiction movies"
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
snapshot!(code, @"403 Forbidden");
|
||||||
|
snapshot!(json_string!(response), @r#"
|
||||||
|
{
|
||||||
|
"message": "Personalization service: Unauthorized: invalid API key",
|
||||||
|
"code": "remote_invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#remote_invalid_api_key"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_with_personalization_no_user_context() {
|
||||||
|
// Create a server with a fake personalization API key
|
||||||
|
let dir = TempDir::new().unwrap();
|
||||||
|
let options = Opt {
|
||||||
|
experimental_personalization_api_key: Some("fake-api-key-12345".to_string()),
|
||||||
|
..default_settings(dir.path())
|
||||||
|
};
|
||||||
|
let server = Server::new_with_options(options).await.unwrap();
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
// Create the index and add some documents
|
||||||
|
let (task, _code) = index.create(None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (task, _code) = index
|
||||||
|
.add_documents(
|
||||||
|
json!([
|
||||||
|
{"id": 1, "title": "The Dark Knight", "genre": "Action"},
|
||||||
|
{"id": 2, "title": "Inception", "genre": "Sci-Fi"},
|
||||||
|
{"id": 3, "title": "The Matrix", "genre": "Sci-Fi"}
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Try to search with personalization - should return remote_invalid_api_key error
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"q": "the",
|
||||||
|
"personalize": {}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Missing field `userContext` inside `.personalize`",
|
||||||
|
"code": "invalid_search_personalize",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_personalize"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|||||||
@@ -137,6 +137,60 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
}])
|
}])
|
||||||
});
|
});
|
||||||
|
|
||||||
|
static MANY_DOCS: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"title": "Shazam!",
|
||||||
|
"desc": "a Captain Marvel ersatz",
|
||||||
|
"id": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Planet",
|
||||||
|
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||||
|
"id": "2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "3",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "4",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "5",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "6",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "7",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "8",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "9",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "10",
|
||||||
|
}])
|
||||||
|
});
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn simple_search() {
|
async fn simple_search() {
|
||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
@@ -449,6 +503,38 @@ async fn simple_search_hf() {
|
|||||||
snapshot!(response["semanticHitCount"], @"3");
|
snapshot!(response["semanticHitCount"], @"3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn issue_5976_missing_docs_hf() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = index_with_documents_hf(server, &MANY_DOCS).await;
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(
|
||||||
|
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
let are_empty: Vec<_> = response["hits"]
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty())
|
||||||
|
.collect();
|
||||||
|
snapshot!(json!(are_empty), @r###"
|
||||||
|
[
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn distribution_shift() {
|
async fn distribution_shift() {
|
||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
|
|||||||
@@ -3141,3 +3141,513 @@ fn fail(override_response_body: Option<&str>) -> ResponseTemplate {
|
|||||||
response.set_body_json(json!({"error": "provoked error", "code": "test_error", "link": "https://docs.meilisearch.com/errors#test_error"}))
|
response.set_body_json(json!({"error": "provoked error", "code": "test_error", "link": "https://docs.meilisearch.com/errors#test_error"}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn remote_auto_sharding() {
|
||||||
|
let ms0 = Server::new().await;
|
||||||
|
let ms1 = Server::new().await;
|
||||||
|
let ms2 = Server::new().await;
|
||||||
|
|
||||||
|
// enable feature
|
||||||
|
|
||||||
|
let (response, code) = ms0.set_features(json!({"network": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["network"]), @"true");
|
||||||
|
let (response, code) = ms1.set_features(json!({"network": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["network"]), @"true");
|
||||||
|
let (response, code) = ms2.set_features(json!({"network": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["network"]), @"true");
|
||||||
|
|
||||||
|
// set self & sharding
|
||||||
|
|
||||||
|
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"self": "ms0",
|
||||||
|
"remotes": {},
|
||||||
|
"sharding": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let (response, code) = ms1.set_network(json!({"self": "ms1", "sharding": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"self": "ms1",
|
||||||
|
"remotes": {},
|
||||||
|
"sharding": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let (response, code) = ms2.set_network(json!({"self": "ms2", "sharding": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"self": "ms2",
|
||||||
|
"remotes": {},
|
||||||
|
"sharding": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// wrap servers
|
||||||
|
let ms0 = Arc::new(ms0);
|
||||||
|
let ms1 = Arc::new(ms1);
|
||||||
|
let ms2 = Arc::new(ms2);
|
||||||
|
|
||||||
|
let rms0 = LocalMeili::new(ms0.clone()).await;
|
||||||
|
let rms1 = LocalMeili::new(ms1.clone()).await;
|
||||||
|
let rms2 = LocalMeili::new(ms2.clone()).await;
|
||||||
|
|
||||||
|
// set network
|
||||||
|
let network = json!({"remotes": {
|
||||||
|
"ms0": {
|
||||||
|
"url": rms0.url()
|
||||||
|
},
|
||||||
|
"ms1": {
|
||||||
|
"url": rms1.url()
|
||||||
|
},
|
||||||
|
"ms2": {
|
||||||
|
"url": rms2.url()
|
||||||
|
}
|
||||||
|
}});
|
||||||
|
|
||||||
|
println!("{}", serde_json::to_string_pretty(&network).unwrap());
|
||||||
|
|
||||||
|
let (_response, status_code) = ms0.set_network(network.clone()).await;
|
||||||
|
snapshot!(status_code, @"200 OK");
|
||||||
|
let (_response, status_code) = ms1.set_network(network.clone()).await;
|
||||||
|
snapshot!(status_code, @"200 OK");
|
||||||
|
let (_response, status_code) = ms2.set_network(network.clone()).await;
|
||||||
|
snapshot!(status_code, @"200 OK");
|
||||||
|
|
||||||
|
// add documents
|
||||||
|
let documents = SCORE_DOCUMENTS.clone();
|
||||||
|
let documents = documents.as_array().unwrap();
|
||||||
|
let index0 = ms0.index("test");
|
||||||
|
let _index1 = ms1.index("test");
|
||||||
|
let _index2 = ms2.index("test");
|
||||||
|
|
||||||
|
let (task, _status_code) = index0.add_documents(json!(documents), None).await;
|
||||||
|
|
||||||
|
let t0 = task.uid();
|
||||||
|
let (t, _) = ms0.get_task(task.uid()).await;
|
||||||
|
let t1 = t["network"]["remote_tasks"]["ms1"]["taskUid"].as_u64().unwrap();
|
||||||
|
let t2 = t["network"]["remote_tasks"]["ms2"]["taskUid"].as_u64().unwrap();
|
||||||
|
|
||||||
|
ms0.wait_task(t0).await.succeeded();
|
||||||
|
ms1.wait_task(t1).await.succeeded();
|
||||||
|
ms2.wait_task(t2).await.succeeded();
|
||||||
|
|
||||||
|
// perform multi-search
|
||||||
|
let query = "badman returns";
|
||||||
|
let request = json!({
|
||||||
|
"federation": {},
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"q": query,
|
||||||
|
"indexUid": "test",
|
||||||
|
"federationOptions": {
|
||||||
|
"remote": "ms0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"q": query,
|
||||||
|
"indexUid": "test",
|
||||||
|
"federationOptions": {
|
||||||
|
"remote": "ms1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"q": query,
|
||||||
|
"indexUid": "test",
|
||||||
|
"federationOptions": {
|
||||||
|
"remote": "ms2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
let (response, _status_code) = ms0.multi_search(request.clone()).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }), @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"title": "Batman Returns",
|
||||||
|
"id": "C",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 2,
|
||||||
|
"weightedRankingScore": 0.8317901234567902,
|
||||||
|
"remote": "ms2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman the dark knight returns: Part 1",
|
||||||
|
"id": "A",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 1,
|
||||||
|
"weightedRankingScore": 0.7028218694885362,
|
||||||
|
"remote": "ms1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman the dark knight returns: Part 2",
|
||||||
|
"id": "B",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 1,
|
||||||
|
"weightedRankingScore": 0.7028218694885362,
|
||||||
|
"remote": "ms1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Badman",
|
||||||
|
"id": "E",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 2,
|
||||||
|
"weightedRankingScore": 0.5,
|
||||||
|
"remote": "ms2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman",
|
||||||
|
"id": "D",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 0,
|
||||||
|
"weightedRankingScore": 0.23106060606060605,
|
||||||
|
"remote": "ms0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 5,
|
||||||
|
"requestUid": "[uuid]",
|
||||||
|
"remoteErrors": {}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let (response, _status_code) = ms1.multi_search(request.clone()).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }), @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"title": "Batman Returns",
|
||||||
|
"id": "C",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 2,
|
||||||
|
"weightedRankingScore": 0.8317901234567902,
|
||||||
|
"remote": "ms2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman the dark knight returns: Part 1",
|
||||||
|
"id": "A",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 1,
|
||||||
|
"weightedRankingScore": 0.7028218694885362,
|
||||||
|
"remote": "ms1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman the dark knight returns: Part 2",
|
||||||
|
"id": "B",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 1,
|
||||||
|
"weightedRankingScore": 0.7028218694885362,
|
||||||
|
"remote": "ms1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Badman",
|
||||||
|
"id": "E",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 2,
|
||||||
|
"weightedRankingScore": 0.5,
|
||||||
|
"remote": "ms2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman",
|
||||||
|
"id": "D",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 0,
|
||||||
|
"weightedRankingScore": 0.23106060606060605,
|
||||||
|
"remote": "ms0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 5,
|
||||||
|
"requestUid": "[uuid]",
|
||||||
|
"remoteErrors": {}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let (response, _status_code) = ms2.multi_search(request.clone()).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]", ".requestUid" => "[uuid]" }), @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"title": "Batman Returns",
|
||||||
|
"id": "C",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 2,
|
||||||
|
"weightedRankingScore": 0.8317901234567902,
|
||||||
|
"remote": "ms2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman the dark knight returns: Part 1",
|
||||||
|
"id": "A",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 1,
|
||||||
|
"weightedRankingScore": 0.7028218694885362,
|
||||||
|
"remote": "ms1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman the dark knight returns: Part 2",
|
||||||
|
"id": "B",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 1,
|
||||||
|
"weightedRankingScore": 0.7028218694885362,
|
||||||
|
"remote": "ms1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Badman",
|
||||||
|
"id": "E",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 2,
|
||||||
|
"weightedRankingScore": 0.5,
|
||||||
|
"remote": "ms2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Batman",
|
||||||
|
"id": "D",
|
||||||
|
"_federation": {
|
||||||
|
"indexUid": "test",
|
||||||
|
"queriesPosition": 0,
|
||||||
|
"weightedRankingScore": 0.23106060606060605,
|
||||||
|
"remote": "ms0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 5,
|
||||||
|
"requestUid": "[uuid]",
|
||||||
|
"remoteErrors": {}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn remote_auto_sharding_with_custom_metadata() {
|
||||||
|
let ms0 = Server::new().await;
|
||||||
|
let ms1 = Server::new().await;
|
||||||
|
let ms2 = Server::new().await;
|
||||||
|
|
||||||
|
// enable feature
|
||||||
|
|
||||||
|
let (response, code) = ms0.set_features(json!({"network": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["network"]), @"true");
|
||||||
|
let (response, code) = ms1.set_features(json!({"network": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["network"]), @"true");
|
||||||
|
let (response, code) = ms2.set_features(json!({"network": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["network"]), @"true");
|
||||||
|
|
||||||
|
// set self & sharding
|
||||||
|
|
||||||
|
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"self": "ms0",
|
||||||
|
"remotes": {},
|
||||||
|
"sharding": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let (response, code) = ms1.set_network(json!({"self": "ms1", "sharding": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"self": "ms1",
|
||||||
|
"remotes": {},
|
||||||
|
"sharding": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let (response, code) = ms2.set_network(json!({"self": "ms2", "sharding": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"self": "ms2",
|
||||||
|
"remotes": {},
|
||||||
|
"sharding": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// wrap servers
|
||||||
|
let ms0 = Arc::new(ms0);
|
||||||
|
let ms1 = Arc::new(ms1);
|
||||||
|
let ms2 = Arc::new(ms2);
|
||||||
|
|
||||||
|
let rms0 = LocalMeili::new(ms0.clone()).await;
|
||||||
|
let rms1 = LocalMeili::new(ms1.clone()).await;
|
||||||
|
let rms2 = LocalMeili::new(ms2.clone()).await;
|
||||||
|
|
||||||
|
// set network
|
||||||
|
let network = json!({"remotes": {
|
||||||
|
"ms0": {
|
||||||
|
"url": rms0.url()
|
||||||
|
},
|
||||||
|
"ms1": {
|
||||||
|
"url": rms1.url()
|
||||||
|
},
|
||||||
|
"ms2": {
|
||||||
|
"url": rms2.url()
|
||||||
|
}
|
||||||
|
}});
|
||||||
|
|
||||||
|
println!("{}", serde_json::to_string_pretty(&network).unwrap());
|
||||||
|
|
||||||
|
let (_response, status_code) = ms0.set_network(network.clone()).await;
|
||||||
|
snapshot!(status_code, @"200 OK");
|
||||||
|
let (_response, status_code) = ms1.set_network(network.clone()).await;
|
||||||
|
snapshot!(status_code, @"200 OK");
|
||||||
|
let (_response, status_code) = ms2.set_network(network.clone()).await;
|
||||||
|
snapshot!(status_code, @"200 OK");
|
||||||
|
|
||||||
|
// add documents
|
||||||
|
let documents = SCORE_DOCUMENTS.clone();
|
||||||
|
let documents = documents.as_array().unwrap();
|
||||||
|
let index0 = ms0.index("test");
|
||||||
|
let _index1 = ms1.index("test");
|
||||||
|
let _index2 = ms2.index("test");
|
||||||
|
|
||||||
|
let (task, _status_code) = index0
|
||||||
|
.add_documents_with_custom_metadata(
|
||||||
|
json!(documents),
|
||||||
|
None,
|
||||||
|
Some("remote_auto_sharding_with_custom_metadata"),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let t0 = task.uid();
|
||||||
|
let (t, _) = ms0.get_task(task.uid()).await;
|
||||||
|
let t1 = t["network"]["remote_tasks"]["ms1"]["taskUid"].as_u64().unwrap();
|
||||||
|
let t2 = t["network"]["remote_tasks"]["ms2"]["taskUid"].as_u64().unwrap();
|
||||||
|
|
||||||
|
let t = ms0.wait_task(t0).await.succeeded();
|
||||||
|
snapshot!(t, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"batchUid": "[batch_uid]",
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 5,
|
||||||
|
"indexedDocuments": 1
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]",
|
||||||
|
"network": {
|
||||||
|
"remote_tasks": {
|
||||||
|
"ms1": {
|
||||||
|
"taskUid": 0,
|
||||||
|
"error": null
|
||||||
|
},
|
||||||
|
"ms2": {
|
||||||
|
"taskUid": 0,
|
||||||
|
"error": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let t = ms1.wait_task(t1).await.succeeded();
|
||||||
|
snapshot!(t, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"batchUid": "[batch_uid]",
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 5,
|
||||||
|
"indexedDocuments": 2
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]",
|
||||||
|
"network": {
|
||||||
|
"origin": {
|
||||||
|
"remoteName": "ms0",
|
||||||
|
"taskUid": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let t = ms2.wait_task(t2).await.succeeded();
|
||||||
|
snapshot!(t, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"batchUid": "[batch_uid]",
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 5,
|
||||||
|
"indexedDocuments": 2
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]",
|
||||||
|
"network": {
|
||||||
|
"origin": {
|
||||||
|
"remoteName": "ms0",
|
||||||
|
"taskUid": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"customMetadata": "remote_auto_sharding_with_custom_metadata"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ async fn cli_only() {
|
|||||||
|
|
||||||
let (webhooks, code) = server.get_webhooks().await;
|
let (webhooks, code) = server.get_webhooks().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(webhooks, @r#"
|
snapshot!(webhooks, @r###"
|
||||||
{
|
{
|
||||||
"results": [
|
"results": [
|
||||||
{
|
{
|
||||||
@@ -90,12 +90,12 @@ async fn cli_only() {
|
|||||||
"isEditable": false,
|
"isEditable": false,
|
||||||
"url": "https://example-cli.com/",
|
"url": "https://example-cli.com/",
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "Bearer a-secret-token"
|
"Authorization": "Bearer a-XXXX..."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
"#);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_web::test]
|
#[actix_web::test]
|
||||||
@@ -233,7 +233,7 @@ async fn cli_with_dumps() {
|
|||||||
|
|
||||||
let (webhooks, code) = server.get_webhooks().await;
|
let (webhooks, code) = server.get_webhooks().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(webhooks, @r#"
|
snapshot!(webhooks, @r###"
|
||||||
{
|
{
|
||||||
"results": [
|
"results": [
|
||||||
{
|
{
|
||||||
@@ -241,7 +241,7 @@ async fn cli_with_dumps() {
|
|||||||
"isEditable": false,
|
"isEditable": false,
|
||||||
"url": "http://defined-in-test-cli.com/",
|
"url": "http://defined-in-test-cli.com/",
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "Bearer a-secret-token-defined-in-test-cli"
|
"Authorization": "Bearer a-secXXXXXX..."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -255,7 +255,7 @@ async fn cli_with_dumps() {
|
|||||||
"isEditable": true,
|
"isEditable": true,
|
||||||
"url": "https://example.com/hook",
|
"url": "https://example.com/hook",
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "TOKEN"
|
"authorization": "XXX..."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -266,7 +266,7 @@ async fn cli_with_dumps() {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
"#);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_web::test]
|
#[actix_web::test]
|
||||||
@@ -367,30 +367,30 @@ async fn post_get_delete() {
|
|||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"201 Created");
|
snapshot!(code, @"201 Created");
|
||||||
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r#"
|
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r###"
|
||||||
{
|
{
|
||||||
"uuid": "[uuid]",
|
"uuid": "[uuid]",
|
||||||
"isEditable": true,
|
"isEditable": true,
|
||||||
"url": "https://example.com/hook",
|
"url": "https://example.com/hook",
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "TOKEN"
|
"authorization": "XXX..."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"#);
|
"###);
|
||||||
|
|
||||||
let uuid = value.get("uuid").unwrap().as_str().unwrap();
|
let uuid = value.get("uuid").unwrap().as_str().unwrap();
|
||||||
let (value, code) = server.get_webhook(uuid).await;
|
let (value, code) = server.get_webhook(uuid).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r#"
|
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r###"
|
||||||
{
|
{
|
||||||
"uuid": "[uuid]",
|
"uuid": "[uuid]",
|
||||||
"isEditable": true,
|
"isEditable": true,
|
||||||
"url": "https://example.com/hook",
|
"url": "https://example.com/hook",
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "TOKEN"
|
"authorization": "XXX..."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"#);
|
"###);
|
||||||
|
|
||||||
let (_value, code) = server.delete_webhook(uuid).await;
|
let (_value, code) = server.delete_webhook(uuid).await;
|
||||||
snapshot!(code, @"204 No Content");
|
snapshot!(code, @"204 No Content");
|
||||||
@@ -430,31 +430,31 @@ async fn create_and_patch() {
|
|||||||
let (value, code) =
|
let (value, code) =
|
||||||
server.patch_webhook(&uuid, json!({ "headers": { "authorization": "TOKEN" } })).await;
|
server.patch_webhook(&uuid, json!({ "headers": { "authorization": "TOKEN" } })).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r#"
|
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r###"
|
||||||
{
|
{
|
||||||
"uuid": "[uuid]",
|
"uuid": "[uuid]",
|
||||||
"isEditable": true,
|
"isEditable": true,
|
||||||
"url": "https://example.com/hook",
|
"url": "https://example.com/hook",
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "TOKEN"
|
"authorization": "XXX..."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"#);
|
"###);
|
||||||
|
|
||||||
let (value, code) =
|
let (value, code) =
|
||||||
server.patch_webhook(&uuid, json!({ "headers": { "authorization2": "TOKEN" } })).await;
|
server.patch_webhook(&uuid, json!({ "headers": { "authorization2": "TOKEN" } })).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r#"
|
snapshot!(json_string!(value, { ".uuid" => "[uuid]" }), @r###"
|
||||||
{
|
{
|
||||||
"uuid": "[uuid]",
|
"uuid": "[uuid]",
|
||||||
"isEditable": true,
|
"isEditable": true,
|
||||||
"url": "https://example.com/hook",
|
"url": "https://example.com/hook",
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "TOKEN",
|
"authorization": "XXX...",
|
||||||
"authorization2": "TOKEN"
|
"authorization2": "TOKEN"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"#);
|
"###);
|
||||||
|
|
||||||
let (value, code) =
|
let (value, code) =
|
||||||
server.patch_webhook(&uuid, json!({ "headers": { "authorization": null } })).await;
|
server.patch_webhook(&uuid, json!({ "headers": { "authorization": null } })).await;
|
||||||
@@ -656,3 +656,119 @@ async fn forbidden_fields() {
|
|||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_web::test]
|
||||||
|
async fn receive_custom_metadata() {
|
||||||
|
let WebhookHandle { server_handle: handle1, url: url1, receiver: mut receiver1 } =
|
||||||
|
create_webhook_server().await;
|
||||||
|
let WebhookHandle { server_handle: handle2, url: url2, receiver: mut receiver2 } =
|
||||||
|
create_webhook_server().await;
|
||||||
|
let WebhookHandle { server_handle: handle3, url: url3, receiver: mut receiver3 } =
|
||||||
|
create_webhook_server().await;
|
||||||
|
|
||||||
|
let db_path = tempfile::tempdir().unwrap();
|
||||||
|
let server = Server::new_with_options(Opt {
|
||||||
|
task_webhook_url: Some(Url::parse(&url3).unwrap()),
|
||||||
|
..default_settings(db_path.path())
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
for url in [url1, url2] {
|
||||||
|
let (value, code) = server.create_webhook(json!({ "url": url })).await;
|
||||||
|
snapshot!(code, @"201 Created");
|
||||||
|
snapshot!(json_string!(value, { ".uuid" => "[uuid]", ".url" => "[ignored]" }), @r#"
|
||||||
|
{
|
||||||
|
"uuid": "[uuid]",
|
||||||
|
"isEditable": true,
|
||||||
|
"url": "[ignored]",
|
||||||
|
"headers": {}
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
let index = server.index("tamo");
|
||||||
|
let (response, code) = index
|
||||||
|
.add_documents_with_custom_metadata(
|
||||||
|
json!({ "id": 1, "doggo": "bone" }),
|
||||||
|
None,
|
||||||
|
Some("test_meta"),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"taskUid": 0,
|
||||||
|
"indexUid": "tamo",
|
||||||
|
"status": "enqueued",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"customMetadata": "test_meta"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
|
||||||
|
let mut count1 = 0;
|
||||||
|
let mut count2 = 0;
|
||||||
|
let mut count3 = 0;
|
||||||
|
while count1 == 0 || count2 == 0 || count3 == 0 {
|
||||||
|
tokio::select! {
|
||||||
|
msg = receiver1.recv() => {
|
||||||
|
if let Some(msg) = msg {
|
||||||
|
count1 += 1;
|
||||||
|
check_metadata(msg);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
msg = receiver2.recv() => {
|
||||||
|
if let Some(msg) = msg {
|
||||||
|
count2 += 1;
|
||||||
|
check_metadata(msg);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
msg = receiver3.recv() => {
|
||||||
|
if let Some(msg) = msg {
|
||||||
|
count3 += 1;
|
||||||
|
check_metadata(msg);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(count1, 1);
|
||||||
|
assert_eq!(count2, 1);
|
||||||
|
assert_eq!(count3, 1);
|
||||||
|
|
||||||
|
handle1.abort();
|
||||||
|
handle2.abort();
|
||||||
|
handle3.abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_metadata(msg: Vec<u8>) {
|
||||||
|
let msg = String::from_utf8(msg).unwrap();
|
||||||
|
let tasks = msg.split('\n');
|
||||||
|
for task in tasks {
|
||||||
|
if task.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let task: serde_json::Value = serde_json::from_str(task).unwrap();
|
||||||
|
snapshot!(common::Value(task), @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"batchUid": "[batch_uid]",
|
||||||
|
"indexUid": "tamo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 1
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]",
|
||||||
|
"customMetadata": "test_meta"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ async fn version_too_old() {
|
|||||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.24.0");
|
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.26.0");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
|
|||||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
||||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||||
snapshot!(err, @"Database version 1.24.1 is higher than the Meilisearch version 1.24.0. Downgrade is not supported");
|
snapshot!(err, @"Database version 1.26.1 is higher than the Meilisearch version 1.26.0. Downgrade is not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"progress": null,
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
|||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"upgradeFrom": "v1.12.0",
|
"upgradeFrom": "v1.12.0",
|
||||||
"upgradeTo": "v1.24.0"
|
"upgradeTo": "v1.26.0"
|
||||||
},
|
},
|
||||||
"error": null,
|
"error": null,
|
||||||
"duration": "[duration]",
|
"duration": "[duration]",
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
|
|||||||
|
|
||||||
for uuid in file_store.all_uuids().context("while retrieving uuids from file store")? {
|
for uuid in file_store.all_uuids().context("while retrieving uuids from file store")? {
|
||||||
let uuid = uuid.context("while retrieving uuid from file store")?;
|
let uuid = uuid.context("while retrieving uuid from file store")?;
|
||||||
let update_file_path = file_store.get_update_path(uuid);
|
let update_file_path = file_store.update_path(uuid);
|
||||||
let update_file = file_store
|
let update_file = file_store
|
||||||
.get_update(uuid)
|
.get_update(uuid)
|
||||||
.with_context(|| format!("while getting update file for uuid {uuid:?}"))?;
|
.with_context(|| format!("while getting update file for uuid {uuid:?}"))?;
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ bincode = "1.3.3"
|
|||||||
bstr = "1.12.0"
|
bstr = "1.12.0"
|
||||||
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
||||||
byteorder = "1.5.0"
|
byteorder = "1.5.0"
|
||||||
charabia = { version = "0.9.7", default-features = false }
|
charabia = { version = "0.9.8", default-features = false }
|
||||||
cellulite = "0.3.1-nested-rtxns-2"
|
cellulite = "0.3.1-nested-rtxns-2"
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
convert_case = "0.8.0"
|
convert_case = "0.8.0"
|
||||||
@@ -34,7 +34,7 @@ grenad = { version = "0.5.0", default-features = false, features = [
|
|||||||
"rayon",
|
"rayon",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
] }
|
] }
|
||||||
heed = { version = "0.22.1-nested-rtxns", default-features = false, features = [
|
heed = { version = "0.22.1-nested-rtxns-6", default-features = false, features = [
|
||||||
"serde-json",
|
"serde-json",
|
||||||
"serde-bincode",
|
"serde-bincode",
|
||||||
] }
|
] }
|
||||||
@@ -74,12 +74,13 @@ csv = "1.3.1"
|
|||||||
candle-core = { version = "0.9.1" }
|
candle-core = { version = "0.9.1" }
|
||||||
candle-transformers = { version = "0.9.1" }
|
candle-transformers = { version = "0.9.1" }
|
||||||
candle-nn = { version = "0.9.1" }
|
candle-nn = { version = "0.9.1" }
|
||||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
|
tokenizers = { version = "0.22.1", default-features = false, features = [
|
||||||
"onig",
|
"onig",
|
||||||
] }
|
] }
|
||||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
||||||
"online",
|
"online",
|
||||||
] }
|
] }
|
||||||
|
safetensors = "0.6.2"
|
||||||
tiktoken-rs = "0.7.0"
|
tiktoken-rs = "0.7.0"
|
||||||
liquid = "0.26.11"
|
liquid = "0.26.11"
|
||||||
rhai = { version = "1.22.2", features = [
|
rhai = { version = "1.22.2", features = [
|
||||||
@@ -100,7 +101,6 @@ bumpalo = "3.18.1"
|
|||||||
bumparaw-collections = "0.1.4"
|
bumparaw-collections = "0.1.4"
|
||||||
steppe = { version = "0.4", default-features = false }
|
steppe = { version = "0.4", default-features = false }
|
||||||
thread_local = "1.1.9"
|
thread_local = "1.1.9"
|
||||||
allocator-api2 = "0.3.0"
|
|
||||||
rustc-hash = "2.1.1"
|
rustc-hash = "2.1.1"
|
||||||
enum-iterator = "2.1.0"
|
enum-iterator = "2.1.0"
|
||||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||||
|
|||||||
@@ -425,6 +425,10 @@ impl Index {
|
|||||||
self.env.info().map_size
|
self.env.info().map_size
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn try_clone_inner_file(&self) -> heed::Result<File> {
|
||||||
|
self.env.try_clone_inner_file()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
|
pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
|
||||||
self.env.copy_to_file(file, option).map_err(Into::into)
|
self.env.copy_to_file(file, option).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1173,6 +1173,7 @@ pub fn extract_embeddings_from_fragments<R: io::Read + io::Seek>(
|
|||||||
request_threads,
|
request_threads,
|
||||||
&doc_alloc,
|
&doc_alloc,
|
||||||
embedder_stats,
|
embedder_stats,
|
||||||
|
false,
|
||||||
on_embed,
|
on_embed,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
use std::num::NonZeroUsize;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
|
|
||||||
use super::GrenadParameters;
|
use super::GrenadParameters;
|
||||||
@@ -20,6 +23,7 @@ pub struct IndexerConfig {
|
|||||||
pub experimental_no_edition_2024_for_dumps: bool,
|
pub experimental_no_edition_2024_for_dumps: bool,
|
||||||
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
|
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||||
|
pub s3_snapshot_options: Option<S3SnapshotOptions>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexerConfig {
|
impl IndexerConfig {
|
||||||
@@ -37,6 +41,20 @@ impl IndexerConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct S3SnapshotOptions {
|
||||||
|
pub s3_bucket_url: String,
|
||||||
|
pub s3_bucket_region: String,
|
||||||
|
pub s3_bucket_name: String,
|
||||||
|
pub s3_snapshot_prefix: String,
|
||||||
|
pub s3_access_key: String,
|
||||||
|
pub s3_secret_key: String,
|
||||||
|
pub s3_max_in_flight_parts: NonZeroUsize,
|
||||||
|
pub s3_compression_level: u32,
|
||||||
|
pub s3_signature_duration: Duration,
|
||||||
|
pub s3_multipart_part_size: u64,
|
||||||
|
}
|
||||||
|
|
||||||
/// By default use only 1 thread for indexing in tests
|
/// By default use only 1 thread for indexing in tests
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn default_thread_pool_and_threads() -> (ThreadPoolNoAbort, Option<usize>) {
|
pub fn default_thread_pool_and_threads() -> (ThreadPoolNoAbort, Option<usize>) {
|
||||||
@@ -76,6 +94,7 @@ impl Default for IndexerConfig {
|
|||||||
experimental_no_edition_2024_for_dumps: false,
|
experimental_no_edition_2024_for_dumps: false,
|
||||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||||
|
s3_snapshot_options: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ pub use self::concurrent_available_ids::ConcurrentAvailableIds;
|
|||||||
pub use self::facet::bulk::FacetsUpdateBulk;
|
pub use self::facet::bulk::FacetsUpdateBulk;
|
||||||
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
||||||
pub use self::index_documents::{request_threads, *};
|
pub use self::index_documents::{request_threads, *};
|
||||||
pub use self::indexer_config::{default_thread_pool_and_threads, IndexerConfig};
|
pub use self::indexer_config::{default_thread_pool_and_threads, IndexerConfig, S3SnapshotOptions};
|
||||||
pub use self::new::ChannelCongestion;
|
pub use self::new::ChannelCongestion;
|
||||||
pub use self::settings::{validate_embedding_settings, Setting, Settings};
|
pub use self::settings::{validate_embedding_settings, Setting, Settings};
|
||||||
pub use self::update_step::UpdateIndexingStep;
|
pub use self::update_step::UpdateIndexingStep;
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ pub struct EmbeddingExtractor<'a, 'b> {
|
|||||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||||
embedder_stats: &'a EmbedderStats,
|
embedder_stats: &'a EmbedderStats,
|
||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
|
failure_modes: EmbedderFailureModes,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||||
@@ -46,7 +47,15 @@ impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
|||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
||||||
Self { embedders, sender, threads, possible_embedding_mistakes, embedder_stats }
|
let failure_modes = EmbedderFailureModes::from_env();
|
||||||
|
Self {
|
||||||
|
embedders,
|
||||||
|
sender,
|
||||||
|
threads,
|
||||||
|
possible_embedding_mistakes,
|
||||||
|
embedder_stats,
|
||||||
|
failure_modes,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -91,6 +100,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
|||||||
self.threads,
|
self.threads,
|
||||||
self.sender,
|
self.sender,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
|
self.failure_modes,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,6 +277,7 @@ pub struct SettingsChangeEmbeddingExtractor<'a, 'b, SD> {
|
|||||||
sender: EmbeddingSender<'a, 'b>,
|
sender: EmbeddingSender<'a, 'b>,
|
||||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
|
failure_modes: EmbedderFailureModes,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'b, SD: SettingsDelta> SettingsChangeEmbeddingExtractor<'a, 'b, SD> {
|
impl<'a, 'b, SD: SettingsDelta> SettingsChangeEmbeddingExtractor<'a, 'b, SD> {
|
||||||
@@ -279,7 +290,16 @@ impl<'a, 'b, SD: SettingsDelta> SettingsChangeEmbeddingExtractor<'a, 'b, SD> {
|
|||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
||||||
Self { settings_delta, embedder_stats, sender, threads, possible_embedding_mistakes }
|
let failure_modes = EmbedderFailureModes::from_env();
|
||||||
|
|
||||||
|
Self {
|
||||||
|
settings_delta,
|
||||||
|
embedder_stats,
|
||||||
|
sender,
|
||||||
|
threads,
|
||||||
|
possible_embedding_mistakes,
|
||||||
|
failure_modes,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -336,6 +356,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
|||||||
self.threads,
|
self.threads,
|
||||||
self.sender,
|
self.sender,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
|
self.failure_modes,
|
||||||
),
|
),
|
||||||
reindex_action,
|
reindex_action,
|
||||||
));
|
));
|
||||||
@@ -539,6 +560,7 @@ struct Chunks<'a, 'b, 'extractor> {
|
|||||||
enum ChunkType<'a, 'b> {
|
enum ChunkType<'a, 'b> {
|
||||||
DocumentTemplate {
|
DocumentTemplate {
|
||||||
document_template: &'a Prompt,
|
document_template: &'a Prompt,
|
||||||
|
ignore_document_template_failures: bool,
|
||||||
session: EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, &'a str>,
|
session: EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, &'a str>,
|
||||||
},
|
},
|
||||||
Fragments {
|
Fragments {
|
||||||
@@ -559,6 +581,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
sender: EmbeddingSender<'a, 'b>,
|
sender: EmbeddingSender<'a, 'b>,
|
||||||
doc_alloc: &'a Bump,
|
doc_alloc: &'a Bump,
|
||||||
|
failure_modes: EmbedderFailureModes,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let embedder = &runtime.embedder;
|
let embedder = &runtime.embedder;
|
||||||
let dimensions = embedder.dimensions();
|
let dimensions = embedder.dimensions();
|
||||||
@@ -567,12 +590,14 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
let kind = if fragments.is_empty() {
|
let kind = if fragments.is_empty() {
|
||||||
ChunkType::DocumentTemplate {
|
ChunkType::DocumentTemplate {
|
||||||
document_template: &runtime.document_template,
|
document_template: &runtime.document_template,
|
||||||
|
ignore_document_template_failures: failure_modes.ignore_document_template_failures,
|
||||||
session: EmbedSession::new(
|
session: EmbedSession::new(
|
||||||
&runtime.embedder,
|
&runtime.embedder,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
threads,
|
threads,
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
embedder_stats,
|
embedder_stats,
|
||||||
|
failure_modes.ignore_embedder_failures,
|
||||||
OnEmbeddingDocumentUpdates {
|
OnEmbeddingDocumentUpdates {
|
||||||
embedder_id: embedder_info.embedder_id,
|
embedder_id: embedder_info.embedder_id,
|
||||||
sender,
|
sender,
|
||||||
@@ -589,6 +614,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
threads,
|
threads,
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
embedder_stats,
|
embedder_stats,
|
||||||
|
failure_modes.ignore_embedder_failures,
|
||||||
OnEmbeddingDocumentUpdates {
|
OnEmbeddingDocumentUpdates {
|
||||||
embedder_id: embedder_info.embedder_id,
|
embedder_id: embedder_info.embedder_id,
|
||||||
sender,
|
sender,
|
||||||
@@ -693,7 +719,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
ChunkType::DocumentTemplate { document_template, session } => {
|
ChunkType::DocumentTemplate {
|
||||||
|
document_template,
|
||||||
|
ignore_document_template_failures,
|
||||||
|
session,
|
||||||
|
} => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
|
|
||||||
let old_embedder = settings_delta.old_embedders().get(session.embedder_name());
|
let old_embedder = settings_delta.old_embedders().get(session.embedder_name());
|
||||||
@@ -702,6 +732,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
} else {
|
} else {
|
||||||
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
||||||
};
|
};
|
||||||
|
|
||||||
let extractor =
|
let extractor =
|
||||||
DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
|
DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
|
||||||
let old_extractor = old_document_template.map(|old_document_template| {
|
let old_extractor = old_document_template.map(|old_document_template| {
|
||||||
@@ -710,7 +741,15 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
let metadata =
|
let metadata =
|
||||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||||
|
|
||||||
match extractor.diff_settings(document, &external_docid, old_extractor.as_ref())? {
|
let extractor_diff = if *ignore_document_template_failures {
|
||||||
|
let extractor = extractor.ignore_errors();
|
||||||
|
let old_extractor = old_extractor.map(DocumentTemplateExtractor::ignore_errors);
|
||||||
|
extractor.diff_settings(document, &external_docid, old_extractor.as_ref())?
|
||||||
|
} else {
|
||||||
|
extractor.diff_settings(document, &external_docid, old_extractor.as_ref())?
|
||||||
|
};
|
||||||
|
|
||||||
|
match extractor_diff {
|
||||||
ExtractorDiff::Removed => {
|
ExtractorDiff::Removed => {
|
||||||
if old_is_user_provided || full_reindex {
|
if old_is_user_provided || full_reindex {
|
||||||
session.on_embed_mut().clear_vectors(docid);
|
session.on_embed_mut().clear_vectors(docid);
|
||||||
@@ -758,7 +797,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
new_must_regenerate,
|
new_must_regenerate,
|
||||||
);
|
);
|
||||||
match &mut self.kind {
|
match &mut self.kind {
|
||||||
ChunkType::DocumentTemplate { document_template, session } => {
|
ChunkType::DocumentTemplate {
|
||||||
|
document_template,
|
||||||
|
ignore_document_template_failures,
|
||||||
|
session,
|
||||||
|
} => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
let ex = DocumentTemplateExtractor::new(
|
let ex = DocumentTemplateExtractor::new(
|
||||||
document_template,
|
document_template,
|
||||||
@@ -766,18 +809,33 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
);
|
);
|
||||||
|
|
||||||
update_autogenerated(
|
if *ignore_document_template_failures {
|
||||||
docid,
|
update_autogenerated(
|
||||||
external_docid,
|
docid,
|
||||||
[ex],
|
external_docid,
|
||||||
old_document,
|
[ex.ignore_errors()],
|
||||||
new_document,
|
old_document,
|
||||||
&external_docid,
|
new_document,
|
||||||
old_must_regenerate,
|
&external_docid,
|
||||||
old_is_user_provided,
|
old_must_regenerate,
|
||||||
session,
|
old_is_user_provided,
|
||||||
unused_vectors_distribution,
|
session,
|
||||||
)?
|
unused_vectors_distribution,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
update_autogenerated(
|
||||||
|
docid,
|
||||||
|
external_docid,
|
||||||
|
[ex],
|
||||||
|
old_document,
|
||||||
|
new_document,
|
||||||
|
&external_docid,
|
||||||
|
old_must_regenerate,
|
||||||
|
old_is_user_provided,
|
||||||
|
session,
|
||||||
|
unused_vectors_distribution,
|
||||||
|
)
|
||||||
|
}?
|
||||||
}
|
}
|
||||||
ChunkType::Fragments { fragments, session } => {
|
ChunkType::Fragments { fragments, session } => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
@@ -844,23 +902,38 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
);
|
);
|
||||||
|
|
||||||
match &mut self.kind {
|
match &mut self.kind {
|
||||||
ChunkType::DocumentTemplate { document_template, session } => {
|
ChunkType::DocumentTemplate {
|
||||||
|
document_template,
|
||||||
|
ignore_document_template_failures,
|
||||||
|
session,
|
||||||
|
} => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
let ex = DocumentTemplateExtractor::new(
|
let ex = DocumentTemplateExtractor::new(
|
||||||
document_template,
|
document_template,
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
);
|
);
|
||||||
|
if *ignore_document_template_failures {
|
||||||
insert_autogenerated(
|
insert_autogenerated(
|
||||||
docid,
|
docid,
|
||||||
external_docid,
|
external_docid,
|
||||||
[ex],
|
[ex.ignore_errors()],
|
||||||
new_document,
|
new_document,
|
||||||
&external_docid,
|
&external_docid,
|
||||||
session,
|
session,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
)?;
|
)?;
|
||||||
|
} else {
|
||||||
|
insert_autogenerated(
|
||||||
|
docid,
|
||||||
|
external_docid,
|
||||||
|
[ex],
|
||||||
|
new_document,
|
||||||
|
&external_docid,
|
||||||
|
session,
|
||||||
|
unused_vectors_distribution,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ChunkType::Fragments { fragments, session } => {
|
ChunkType::Fragments { fragments, session } => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
@@ -884,7 +957,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
|
|
||||||
pub fn drain(self, unused_vectors_distribution: &UnusedVectorsDistributionBump) -> Result<()> {
|
pub fn drain(self, unused_vectors_distribution: &UnusedVectorsDistributionBump) -> Result<()> {
|
||||||
match self.kind {
|
match self.kind {
|
||||||
ChunkType::DocumentTemplate { document_template: _, session } => {
|
ChunkType::DocumentTemplate {
|
||||||
|
document_template: _,
|
||||||
|
ignore_document_template_failures: _,
|
||||||
|
session,
|
||||||
|
} => {
|
||||||
session.drain(unused_vectors_distribution)?;
|
session.drain(unused_vectors_distribution)?;
|
||||||
}
|
}
|
||||||
ChunkType::Fragments { fragments: _, session } => {
|
ChunkType::Fragments { fragments: _, session } => {
|
||||||
@@ -896,9 +973,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
|
|
||||||
pub fn embedder_name(&self) -> &'a str {
|
pub fn embedder_name(&self) -> &'a str {
|
||||||
match &self.kind {
|
match &self.kind {
|
||||||
ChunkType::DocumentTemplate { document_template: _, session } => {
|
ChunkType::DocumentTemplate {
|
||||||
session.embedder_name()
|
document_template: _,
|
||||||
}
|
ignore_document_template_failures: _,
|
||||||
|
session,
|
||||||
|
} => session.embedder_name(),
|
||||||
ChunkType::Fragments { fragments: _, session } => session.embedder_name(),
|
ChunkType::Fragments { fragments: _, session } => session.embedder_name(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -967,7 +1046,11 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
match &mut self.kind {
|
match &mut self.kind {
|
||||||
ChunkType::DocumentTemplate { document_template: _, session } => {
|
ChunkType::DocumentTemplate {
|
||||||
|
document_template: _,
|
||||||
|
ignore_document_template_failures: _,
|
||||||
|
session,
|
||||||
|
} => {
|
||||||
session.on_embed_mut().process_embeddings(
|
session.on_embed_mut().process_embeddings(
|
||||||
Metadata { docid, external_docid, extractor_id: 0 },
|
Metadata { docid, external_docid, extractor_id: 0 },
|
||||||
embeddings,
|
embeddings,
|
||||||
@@ -1078,3 +1161,41 @@ where
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
||||||
|
struct EmbedderFailureModes {
|
||||||
|
pub ignore_document_template_failures: bool,
|
||||||
|
pub ignore_embedder_failures: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EmbedderFailureModes {
|
||||||
|
fn from_env() -> Self {
|
||||||
|
match std::env::var("MEILI_EXPERIMENTAL_CONFIG_EMBEDDER_FAILURE_MODES") {
|
||||||
|
Ok(failure_modes) => Self::parse_from_str(
|
||||||
|
&failure_modes,
|
||||||
|
"`MEILI_EXPERIMENTAL_CONFIG_EMBEDDER_FAILURE_MODES`",
|
||||||
|
),
|
||||||
|
Err(std::env::VarError::NotPresent) => Self::default(),
|
||||||
|
Err(std::env::VarError::NotUnicode(_)) => panic!(
|
||||||
|
"`MEILI_EXPERIMENTAL_CONFIG_EMBEDDER_FAILURE_MODES` contains a non-unicode value"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_from_str(failure_modes: &str, provenance: &'static str) -> Self {
|
||||||
|
let Self { mut ignore_document_template_failures, mut ignore_embedder_failures } =
|
||||||
|
Default::default();
|
||||||
|
for segment in failure_modes.split(',') {
|
||||||
|
let segment = segment.trim();
|
||||||
|
match segment {
|
||||||
|
"ignore_document_template_failures" => {
|
||||||
|
ignore_document_template_failures = true;
|
||||||
|
}
|
||||||
|
"ignore_embedder_failures" => ignore_embedder_failures = true,
|
||||||
|
"" => continue,
|
||||||
|
segment => panic!("Unrecognized segment value for {provenance}: {segment}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Self { ignore_document_template_failures, ignore_embedder_failures }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
|||||||
use std::iter;
|
use std::iter;
|
||||||
|
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use heed::types::{Bytes, DecodeIgnore};
|
use heed::types::{Bytes, DecodeIgnore, Str};
|
||||||
use heed::{BytesDecode, Database, Error, RoTxn, RwTxn};
|
use heed::{BytesDecode, Database, Error, RoTxn, RwTxn};
|
||||||
use rayon::iter::{IndexedParallelIterator as _, IntoParallelIterator, ParallelIterator as _};
|
use rayon::iter::{IndexedParallelIterator as _, IntoParallelIterator, ParallelIterator as _};
|
||||||
use roaring::MultiOps;
|
use roaring::MultiOps;
|
||||||
@@ -16,22 +16,29 @@ use crate::heed_codec::StrBEU16Codec;
|
|||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Prefix, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Prefix, Result};
|
||||||
|
|
||||||
struct WordPrefixDocids {
|
struct WordPrefixDocids<'i> {
|
||||||
|
index: &'i Index,
|
||||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
max_memory_by_thread: Option<usize>,
|
max_memory_by_thread: Option<usize>,
|
||||||
|
/// Do not use an experimental LMDB feature to read uncommitted data in parallel.
|
||||||
|
no_experimental_post_processing: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WordPrefixDocids {
|
impl<'i> WordPrefixDocids<'i> {
|
||||||
fn new(
|
fn new(
|
||||||
|
index: &'i Index,
|
||||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
grenad_parameters: &GrenadParameters,
|
grenad_parameters: &GrenadParameters,
|
||||||
) -> WordPrefixDocids {
|
) -> WordPrefixDocids<'i> {
|
||||||
WordPrefixDocids {
|
WordPrefixDocids {
|
||||||
|
index,
|
||||||
database,
|
database,
|
||||||
prefix_database,
|
prefix_database,
|
||||||
max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
|
max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
|
||||||
|
no_experimental_post_processing: grenad_parameters
|
||||||
|
.experimental_no_edition_2024_for_prefix_post_processing,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -42,7 +49,77 @@ impl WordPrefixDocids {
|
|||||||
prefix_to_delete: &BTreeSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
if self.no_experimental_post_processing {
|
||||||
|
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||||
|
} else {
|
||||||
|
self.recompute_modified_prefixes_no_frozen(wtxn, prefix_to_compute)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||||
|
fn recompute_modified_prefixes_no_frozen(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let thread_count = rayon::current_num_threads();
|
||||||
|
let rtxns = iter::repeat_with(|| self.index.env.nested_read_txn(wtxn))
|
||||||
|
.take(thread_count)
|
||||||
|
.collect::<heed::Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let outputs = rtxns
|
||||||
|
.into_par_iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(thread_id, rtxn)| {
|
||||||
|
// `indexes` represent offsets at which prefixes computations were stored in the `file`.
|
||||||
|
let mut indexes = Vec::new();
|
||||||
|
let mut file = BufWriter::new(spooled_tempfile(
|
||||||
|
self.max_memory_by_thread.unwrap_or(usize::MAX),
|
||||||
|
));
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
for (prefix_index, prefix) in prefix_to_compute.iter().enumerate() {
|
||||||
|
// Is prefix for another thread?
|
||||||
|
if prefix_index % thread_count != thread_id {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let output = self
|
||||||
|
.database
|
||||||
|
.prefix_iter(&rtxn, prefix.as_bytes())?
|
||||||
|
.remap_types::<Str, CboRoaringBitmapCodec>()
|
||||||
|
.map(|result| result.map(|(_word, bitmap)| bitmap))
|
||||||
|
.union()?;
|
||||||
|
|
||||||
|
buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
|
||||||
|
indexes.push(PrefixEntry { prefix, serialized_length: buffer.len() });
|
||||||
|
file.write_all(&buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((indexes, file))
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
// We iterate over all the collected and serialized bitmaps through
|
||||||
|
// the files and entries to eventually put them in the final database.
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
for (index, file) in outputs {
|
||||||
|
let mut file = file.into_inner().map_err(|e| e.into_error())?;
|
||||||
|
file.rewind()?;
|
||||||
|
let mut file = BufReader::new(file);
|
||||||
|
for PrefixEntry { prefix, serialized_length } in index {
|
||||||
|
buffer.resize(serialized_length, 0);
|
||||||
|
file.read_exact(&mut buffer)?;
|
||||||
|
self.prefix_database.remap_data_type::<Bytes>().put(
|
||||||
|
wtxn,
|
||||||
|
prefix.as_bytes(),
|
||||||
|
&buffer,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||||
@@ -463,6 +540,7 @@ pub fn compute_word_prefix_docids(
|
|||||||
grenad_parameters: &GrenadParameters,
|
grenad_parameters: &GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixDocids::new(
|
WordPrefixDocids::new(
|
||||||
|
index,
|
||||||
index.word_docids.remap_key_type(),
|
index.word_docids.remap_key_type(),
|
||||||
index.word_prefix_docids.remap_key_type(),
|
index.word_prefix_docids.remap_key_type(),
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
@@ -479,6 +557,7 @@ pub fn compute_exact_word_prefix_docids(
|
|||||||
grenad_parameters: &GrenadParameters,
|
grenad_parameters: &GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixDocids::new(
|
WordPrefixDocids::new(
|
||||||
|
index,
|
||||||
index.exact_word_docids.remap_key_type(),
|
index.exact_word_docids.remap_key_type(),
|
||||||
index.exact_word_prefix_docids.remap_key_type(),
|
index.exact_word_prefix_docids.remap_key_type(),
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
|
|||||||
@@ -1631,8 +1631,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
|
|
||||||
// Update index settings
|
// Update index settings
|
||||||
let embedding_config_updates = self.update_embedding_configs()?;
|
let embedding_config_updates = self.update_embedding_configs()?;
|
||||||
|
self.update_user_defined_searchable_attributes()?;
|
||||||
|
|
||||||
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
let mut new_inner_settings =
|
||||||
|
InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||||
|
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
|
||||||
|
|
||||||
let primary_key_id = self
|
let primary_key_id = self
|
||||||
.index
|
.index
|
||||||
|
|||||||
@@ -42,6 +42,8 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
|||||||
&ToTargetNoOp { target: (1, 22, 0) },
|
&ToTargetNoOp { target: (1, 22, 0) },
|
||||||
&ToTargetNoOp { target: (1, 23, 0) },
|
&ToTargetNoOp { target: (1, 23, 0) },
|
||||||
&ToTargetNoOp { target: (1, 24, 0) },
|
&ToTargetNoOp { target: (1, 24, 0) },
|
||||||
|
&ToTargetNoOp { target: (1, 25, 0) },
|
||||||
|
&ToTargetNoOp { target: (1, 26, 0) },
|
||||||
// This is the last upgrade function, it will be called when the index is up to date.
|
// This is the last upgrade function, it will be called when the index is up to date.
|
||||||
// any other upgrade function should be added before this one.
|
// any other upgrade function should be added before this one.
|
||||||
&ToCurrentNoOp {},
|
&ToCurrentNoOp {},
|
||||||
@@ -77,6 +79,8 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
|
|||||||
(1, 22, _) => function_index!(12),
|
(1, 22, _) => function_index!(12),
|
||||||
(1, 23, _) => function_index!(13),
|
(1, 23, _) => function_index!(13),
|
||||||
(1, 24, _) => function_index!(14),
|
(1, 24, _) => function_index!(14),
|
||||||
|
(1, 25, _) => function_index!(15),
|
||||||
|
(1, 26, _) => function_index!(16),
|
||||||
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
|
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
|
||||||
// considering dumpless upgrade.
|
// considering dumpless upgrade.
|
||||||
(_major, _minor, _patch) => return None,
|
(_major, _minor, _patch) => return None,
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
use candle_core::Tensor;
|
use candle_core::Tensor;
|
||||||
use candle_nn::VarBuilder;
|
use candle_nn::VarBuilder;
|
||||||
use candle_transformers::models::bert::{BertModel, Config, DTYPE};
|
use candle_transformers::models::bert::{BertModel, Config as BertConfig, DTYPE};
|
||||||
|
use candle_transformers::models::modernbert::{Config as ModernConfig, ModernBert};
|
||||||
// FIXME: currently we'll be using the hub to retrieve model, in the future we might want to embed it into Meilisearch itself
|
// FIXME: currently we'll be using the hub to retrieve model, in the future we might want to embed it into Meilisearch itself
|
||||||
use hf_hub::api::sync::Api;
|
use hf_hub::api::sync::Api;
|
||||||
use hf_hub::{Repo, RepoType};
|
use hf_hub::{Repo, RepoType};
|
||||||
|
use safetensors::SafeTensors;
|
||||||
use tokenizers::{PaddingParams, Tokenizer};
|
use tokenizers::{PaddingParams, Tokenizer};
|
||||||
|
|
||||||
use super::EmbeddingCache;
|
use super::EmbeddingCache;
|
||||||
@@ -84,14 +86,21 @@ impl Default for EmbedderOptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum ModelKind {
|
||||||
|
Bert(BertModel),
|
||||||
|
Modern(ModernBert),
|
||||||
|
}
|
||||||
|
|
||||||
/// Perform embedding of documents and queries
|
/// Perform embedding of documents and queries
|
||||||
pub struct Embedder {
|
pub struct Embedder {
|
||||||
model: BertModel,
|
model: ModelKind,
|
||||||
tokenizer: Tokenizer,
|
tokenizer: Tokenizer,
|
||||||
options: EmbedderOptions,
|
options: EmbedderOptions,
|
||||||
dimensions: usize,
|
dimensions: usize,
|
||||||
pooling: Pooling,
|
pooling: Pooling,
|
||||||
cache: EmbeddingCache,
|
cache: EmbeddingCache,
|
||||||
|
device: candle_core::Device,
|
||||||
|
max_len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for Embedder {
|
impl std::fmt::Debug for Embedder {
|
||||||
@@ -101,10 +110,60 @@ impl std::fmt::Debug for Embedder {
|
|||||||
.field("tokenizer", &self.tokenizer)
|
.field("tokenizer", &self.tokenizer)
|
||||||
.field("options", &self.options)
|
.field("options", &self.options)
|
||||||
.field("pooling", &self.pooling)
|
.field("pooling", &self.pooling)
|
||||||
|
.field("device", &self.device)
|
||||||
|
.field("max_len", &self.max_len)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// some models do not have the "model." prefix in their safetensors weights
|
||||||
|
fn change_tensor_names(
|
||||||
|
weights_path: &std::path::Path,
|
||||||
|
) -> Result<std::path::PathBuf, NewEmbedderError> {
|
||||||
|
let data = std::fs::read(weights_path)
|
||||||
|
.map_err(|e| NewEmbedderError::safetensor_weight(candle_core::Error::Io(e)))?;
|
||||||
|
|
||||||
|
let tensors = SafeTensors::deserialize(&data)
|
||||||
|
.map_err(|e| NewEmbedderError::safetensor_weight(candle_core::Error::Msg(e.to_string())))?;
|
||||||
|
|
||||||
|
let names = tensors.names();
|
||||||
|
let has_model_prefix = names.iter().any(|n| n.starts_with("model."));
|
||||||
|
|
||||||
|
if has_model_prefix {
|
||||||
|
return Ok(weights_path.to_path_buf());
|
||||||
|
}
|
||||||
|
|
||||||
|
let fixed_path = weights_path.with_extension("fixed.safetensors");
|
||||||
|
|
||||||
|
if fixed_path.exists() {
|
||||||
|
return Ok(fixed_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut new_tensors = vec![];
|
||||||
|
for name in names {
|
||||||
|
let tensor_view = tensors.tensor(name).map_err(|e| {
|
||||||
|
NewEmbedderError::safetensor_weight(candle_core::Error::Msg(e.to_string()))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let new_name = format!("model.{}", name);
|
||||||
|
let data_offset = tensor_view.data();
|
||||||
|
let shape = tensor_view.shape();
|
||||||
|
let dtype = tensor_view.dtype();
|
||||||
|
|
||||||
|
new_tensors.push((new_name, shape.to_vec(), dtype, data_offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
use safetensors::tensor::TensorView;
|
||||||
|
let views = new_tensors.iter().map(|(name, shape, dtype, data)| {
|
||||||
|
(name.as_str(), TensorView::new(*dtype, shape.clone(), data).unwrap())
|
||||||
|
});
|
||||||
|
|
||||||
|
safetensors::serialize_to_file(views, None, &fixed_path)
|
||||||
|
.map_err(|e| NewEmbedderError::safetensor_weight(candle_core::Error::Msg(e.to_string())))?;
|
||||||
|
|
||||||
|
Ok(fixed_path)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, serde::Deserialize)]
|
#[derive(Clone, Copy, serde::Deserialize)]
|
||||||
struct PoolingConfig {
|
struct PoolingConfig {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -220,19 +279,42 @@ impl Embedder {
|
|||||||
(config, tokenizer, weights, source, pooling)
|
(config, tokenizer, weights, source, pooling)
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = std::fs::read_to_string(&config_filename)
|
let config_str = std::fs::read_to_string(&config_filename)
|
||||||
.map_err(|inner| NewEmbedderError::open_config(config_filename.clone(), inner))?;
|
.map_err(|inner| NewEmbedderError::open_config(config_filename.clone(), inner))?;
|
||||||
let config: Config = serde_json::from_str(&config).map_err(|inner| {
|
|
||||||
NewEmbedderError::deserialize_config(
|
let cfg_val: serde_json::Value = match serde_json::from_str(&config_str) {
|
||||||
options.model.clone(),
|
Ok(v) => v,
|
||||||
config,
|
Err(inner) => {
|
||||||
config_filename,
|
return Err(NewEmbedderError::deserialize_config(
|
||||||
inner,
|
options.model.clone(),
|
||||||
)
|
config_str.clone(),
|
||||||
})?;
|
config_filename.clone(),
|
||||||
|
inner,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let model_type = cfg_val.get("model_type").and_then(|v| v.as_str()).unwrap_or_default();
|
||||||
|
let arch_arr = cfg_val.get("architectures").and_then(|v| v.as_array());
|
||||||
|
let has_arch = |needle: &str| {
|
||||||
|
model_type.eq_ignore_ascii_case(needle)
|
||||||
|
|| arch_arr.is_some_and(|arr| {
|
||||||
|
arr.iter().filter_map(|v| v.as_str()).any(|s| s.to_lowercase().contains(needle))
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
let is_modern = has_arch("modernbert");
|
||||||
|
tracing::debug!(is_modern, model_type, "detected HF architecture");
|
||||||
|
|
||||||
let mut tokenizer = Tokenizer::from_file(&tokenizer_filename)
|
let mut tokenizer = Tokenizer::from_file(&tokenizer_filename)
|
||||||
.map_err(|inner| NewEmbedderError::open_tokenizer(tokenizer_filename, inner))?;
|
.map_err(|inner| NewEmbedderError::open_tokenizer(tokenizer_filename, inner))?;
|
||||||
|
|
||||||
|
let weights_filename = if is_modern && weight_source == WeightSource::Safetensors {
|
||||||
|
change_tensor_names(&weights_filename)?
|
||||||
|
} else {
|
||||||
|
weights_filename
|
||||||
|
};
|
||||||
|
|
||||||
let vb = match weight_source {
|
let vb = match weight_source {
|
||||||
WeightSource::Pytorch => VarBuilder::from_pth(&weights_filename, DTYPE, &device)
|
WeightSource::Pytorch => VarBuilder::from_pth(&weights_filename, DTYPE, &device)
|
||||||
.map_err(NewEmbedderError::pytorch_weight)?,
|
.map_err(NewEmbedderError::pytorch_weight)?,
|
||||||
@@ -244,7 +326,31 @@ impl Embedder {
|
|||||||
|
|
||||||
tracing::debug!(model = options.model, weight=?weight_source, pooling=?pooling, "model config");
|
tracing::debug!(model = options.model, weight=?weight_source, pooling=?pooling, "model config");
|
||||||
|
|
||||||
let model = BertModel::load(vb, &config).map_err(NewEmbedderError::load_model)?;
|
// max length from config, fallback to 512
|
||||||
|
let max_len =
|
||||||
|
cfg_val.get("max_position_embeddings").and_then(|v| v.as_u64()).unwrap_or(512) as usize;
|
||||||
|
|
||||||
|
let model = if is_modern {
|
||||||
|
let config: ModernConfig = serde_json::from_str(&config_str).map_err(|inner| {
|
||||||
|
NewEmbedderError::deserialize_config(
|
||||||
|
options.model.clone(),
|
||||||
|
config_str.clone(),
|
||||||
|
config_filename.clone(),
|
||||||
|
inner,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
ModelKind::Modern(ModernBert::load(vb, &config).map_err(NewEmbedderError::load_model)?)
|
||||||
|
} else {
|
||||||
|
let config: BertConfig = serde_json::from_str(&config_str).map_err(|inner| {
|
||||||
|
NewEmbedderError::deserialize_config(
|
||||||
|
options.model.clone(),
|
||||||
|
config_str.clone(),
|
||||||
|
config_filename.clone(),
|
||||||
|
inner,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
ModelKind::Bert(BertModel::load(vb, &config).map_err(NewEmbedderError::load_model)?)
|
||||||
|
};
|
||||||
|
|
||||||
if let Some(pp) = tokenizer.get_padding_mut() {
|
if let Some(pp) = tokenizer.get_padding_mut() {
|
||||||
pp.strategy = tokenizers::PaddingStrategy::BatchLongest
|
pp.strategy = tokenizers::PaddingStrategy::BatchLongest
|
||||||
@@ -263,6 +369,8 @@ impl Embedder {
|
|||||||
dimensions: 0,
|
dimensions: 0,
|
||||||
pooling,
|
pooling,
|
||||||
cache: EmbeddingCache::new(cache_cap),
|
cache: EmbeddingCache::new(cache_cap),
|
||||||
|
device,
|
||||||
|
max_len,
|
||||||
};
|
};
|
||||||
|
|
||||||
let embeddings = this
|
let embeddings = this
|
||||||
@@ -321,15 +429,29 @@ impl Embedder {
|
|||||||
pub fn embed_one(&self, text: &str) -> std::result::Result<Embedding, EmbedError> {
|
pub fn embed_one(&self, text: &str) -> std::result::Result<Embedding, EmbedError> {
|
||||||
let tokens = self.tokenizer.encode(text, true).map_err(EmbedError::tokenize)?;
|
let tokens = self.tokenizer.encode(text, true).map_err(EmbedError::tokenize)?;
|
||||||
let token_ids = tokens.get_ids();
|
let token_ids = tokens.get_ids();
|
||||||
let token_ids = if token_ids.len() > 512 { &token_ids[..512] } else { token_ids };
|
|
||||||
let token_ids =
|
let token_ids =
|
||||||
Tensor::new(token_ids, &self.model.device).map_err(EmbedError::tensor_shape)?;
|
if token_ids.len() > self.max_len { &token_ids[..self.max_len] } else { token_ids };
|
||||||
|
let token_ids = Tensor::new(token_ids, &self.device).map_err(EmbedError::tensor_shape)?;
|
||||||
let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?;
|
let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?;
|
||||||
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
|
|
||||||
let embeddings = self
|
let embeddings = match &self.model {
|
||||||
.model
|
ModelKind::Bert(model) => {
|
||||||
.forward(&token_ids, &token_type_ids, None)
|
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
|
||||||
.map_err(EmbedError::model_forward)?;
|
model
|
||||||
|
.forward(&token_ids, &token_type_ids, None)
|
||||||
|
.map_err(EmbedError::model_forward)?
|
||||||
|
}
|
||||||
|
ModelKind::Modern(model) => {
|
||||||
|
let mut mask_vec = tokens.get_attention_mask().to_vec();
|
||||||
|
if mask_vec.len() > self.max_len {
|
||||||
|
mask_vec.truncate(self.max_len);
|
||||||
|
}
|
||||||
|
let mask = Tensor::new(mask_vec.as_slice(), &self.device)
|
||||||
|
.map_err(EmbedError::tensor_shape)?;
|
||||||
|
let mask = Tensor::stack(&[mask], 0).map_err(EmbedError::tensor_shape)?;
|
||||||
|
model.forward(&token_ids, &mask).map_err(EmbedError::model_forward)?
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let embedding = Self::pooling(embeddings, self.pooling)?;
|
let embedding = Self::pooling(embeddings, self.pooling)?;
|
||||||
|
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ struct EmbedderData {
|
|||||||
request: RequestData,
|
request: RequestData,
|
||||||
response: Response,
|
response: Response,
|
||||||
configuration_source: ConfigurationSource,
|
configuration_source: ConfigurationSource,
|
||||||
|
max_retry_duration: std::time::Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -182,10 +183,15 @@ impl Embedder {
|
|||||||
) -> Result<Self, NewEmbedderError> {
|
) -> Result<Self, NewEmbedderError> {
|
||||||
let bearer = options.api_key.as_deref().map(|api_key| format!("Bearer {api_key}"));
|
let bearer = options.api_key.as_deref().map(|api_key| format!("Bearer {api_key}"));
|
||||||
|
|
||||||
|
let timeout = std::env::var("MEILI_EXPERIMENTAL_REST_EMBEDDER_TIMEOUT_SECONDS")
|
||||||
|
.ok()
|
||||||
|
.map(|p| p.parse().unwrap())
|
||||||
|
.unwrap_or(30);
|
||||||
|
|
||||||
let client = ureq::AgentBuilder::new()
|
let client = ureq::AgentBuilder::new()
|
||||||
.max_idle_connections(REQUEST_PARALLELISM * 2)
|
.max_idle_connections(REQUEST_PARALLELISM * 2)
|
||||||
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
||||||
.timeout(std::time::Duration::from_secs(30))
|
.timeout(std::time::Duration::from_secs(timeout))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
let request = RequestData::new(
|
let request = RequestData::new(
|
||||||
@@ -196,6 +202,14 @@ impl Embedder {
|
|||||||
|
|
||||||
let response = Response::new(options.response, &request)?;
|
let response = Response::new(options.response, &request)?;
|
||||||
|
|
||||||
|
let max_retry_duration =
|
||||||
|
std::env::var("MEILI_EXPERIMENTAL_REST_EMBEDDER_MAX_RETRY_DURATION_SECONDS")
|
||||||
|
.ok()
|
||||||
|
.map(|p| p.parse().unwrap())
|
||||||
|
.unwrap_or(60);
|
||||||
|
|
||||||
|
let max_retry_duration = std::time::Duration::from_secs(max_retry_duration);
|
||||||
|
|
||||||
let data = EmbedderData {
|
let data = EmbedderData {
|
||||||
client,
|
client,
|
||||||
bearer,
|
bearer,
|
||||||
@@ -204,6 +218,7 @@ impl Embedder {
|
|||||||
response,
|
response,
|
||||||
configuration_source,
|
configuration_source,
|
||||||
headers: options.headers,
|
headers: options.headers,
|
||||||
|
max_retry_duration,
|
||||||
};
|
};
|
||||||
|
|
||||||
let dimensions = if let Some(dimensions) = options.dimensions {
|
let dimensions = if let Some(dimensions) = options.dimensions {
|
||||||
@@ -457,7 +472,7 @@ where
|
|||||||
}
|
}
|
||||||
}?;
|
}?;
|
||||||
|
|
||||||
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
let retry_duration = retry_duration.min(data.max_retry_duration); // don't wait more than the max duration
|
||||||
|
|
||||||
// randomly up to double the retry duration
|
// randomly up to double the retry duration
|
||||||
let retry_duration = retry_duration
|
let retry_duration = retry_duration
|
||||||
|
|||||||
@@ -550,9 +550,9 @@ pub struct DeserializePoolingConfig {
|
|||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
#[error("model `{model_name}` appears to be unsupported{}\n - inner error: {inner}",
|
#[error("model `{model_name}` appears to be unsupported{}\n - inner error: {inner}",
|
||||||
if architectures.is_empty() {
|
if architectures.is_empty() {
|
||||||
"\n - Note: only models with architecture \"BertModel\" are supported.".to_string()
|
"\n - Note: only models with architecture \"BertModel\" or \"ModernBert\" are supported.".to_string()
|
||||||
} else {
|
} else {
|
||||||
format!("\n - Note: model has declared architectures `{architectures:?}`, only models with architecture `\"BertModel\"` are supported.")
|
format!("\n - Note: model has declared architectures `{architectures:?}`, only models with architecture `\"BertModel\"` or `\"ModernBert\"` are supported.")
|
||||||
})]
|
})]
|
||||||
pub struct UnsupportedModel {
|
pub struct UnsupportedModel {
|
||||||
pub model_name: String,
|
pub model_name: String,
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ pub struct EmbedSession<'doc, C, I> {
|
|||||||
embedder_name: &'doc str,
|
embedder_name: &'doc str,
|
||||||
|
|
||||||
embedder_stats: &'doc EmbedderStats,
|
embedder_stats: &'doc EmbedderStats,
|
||||||
|
ignore_embedding_failures: bool,
|
||||||
|
|
||||||
on_embed: C,
|
on_embed: C,
|
||||||
}
|
}
|
||||||
@@ -87,6 +88,7 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
|||||||
threads: &'doc ThreadPoolNoAbort,
|
threads: &'doc ThreadPoolNoAbort,
|
||||||
doc_alloc: &'doc Bump,
|
doc_alloc: &'doc Bump,
|
||||||
embedder_stats: &'doc EmbedderStats,
|
embedder_stats: &'doc EmbedderStats,
|
||||||
|
ignore_embedding_failures: bool,
|
||||||
on_embed: C,
|
on_embed: C,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
|
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
|
||||||
@@ -99,6 +101,7 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
|||||||
threads,
|
threads,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder_stats,
|
embedder_stats,
|
||||||
|
ignore_embedding_failures,
|
||||||
on_embed,
|
on_embed,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -109,13 +112,12 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
|||||||
rendered: I,
|
rendered: I,
|
||||||
unused_vectors_distribution: &C::ErrorMetadata,
|
unused_vectors_distribution: &C::ErrorMetadata,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if self.inputs.len() < self.inputs.capacity() {
|
if self.inputs.len() >= self.inputs.capacity() {
|
||||||
self.inputs.push(rendered);
|
self.embed_chunks(unused_vectors_distribution)?;
|
||||||
self.metadata.push(metadata);
|
|
||||||
return Ok(());
|
|
||||||
}
|
}
|
||||||
|
self.inputs.push(rendered);
|
||||||
self.embed_chunks(unused_vectors_distribution)
|
self.metadata.push(metadata);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<C> {
|
pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<C> {
|
||||||
@@ -144,24 +146,33 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
// reset metadata and inputs, and send metadata to the error processing.
|
// send metadata to the error processing.
|
||||||
let doc_alloc = self.metadata.bump();
|
let doc_alloc = self.metadata.bump();
|
||||||
let metadata = std::mem::replace(
|
let metadata = std::mem::replace(
|
||||||
&mut self.metadata,
|
&mut self.metadata,
|
||||||
BVec::with_capacity_in(self.inputs.capacity(), doc_alloc),
|
BVec::with_capacity_in(self.inputs.capacity(), doc_alloc),
|
||||||
);
|
);
|
||||||
self.inputs.clear();
|
Err(self.on_embed.process_embedding_error(
|
||||||
return Err(self.on_embed.process_embedding_error(
|
|
||||||
error,
|
error,
|
||||||
self.embedder_name,
|
self.embedder_name,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
metadata,
|
metadata,
|
||||||
));
|
))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
self.inputs.clear();
|
self.inputs.clear();
|
||||||
self.metadata.clear();
|
self.metadata.clear();
|
||||||
res
|
if self.ignore_embedding_failures {
|
||||||
|
if let Err(err) = res {
|
||||||
|
tracing::warn!(
|
||||||
|
%err,
|
||||||
|
"ignored error embedding batch of documents due to failure policy"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
res
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn embedder_name(&self) -> &'doc str {
|
pub(crate) fn embedder_name(&self) -> &'doc str {
|
||||||
|
|||||||
@@ -27,10 +27,10 @@ const HANNOY_M0: usize = 32;
|
|||||||
utoipa::ToSchema,
|
utoipa::ToSchema,
|
||||||
)]
|
)]
|
||||||
pub enum VectorStoreBackend {
|
pub enum VectorStoreBackend {
|
||||||
#[default]
|
|
||||||
#[deserr(rename = "stable")]
|
#[deserr(rename = "stable")]
|
||||||
#[serde(rename = "stable")]
|
#[serde(rename = "stable")]
|
||||||
Arroy,
|
Arroy,
|
||||||
|
#[default]
|
||||||
#[deserr(rename = "experimental")]
|
#[deserr(rename = "experimental")]
|
||||||
#[serde(rename = "experimental")]
|
#[serde(rename = "experimental")]
|
||||||
Hannoy,
|
Hannoy,
|
||||||
|
|||||||
Reference in New Issue
Block a user