mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-08 21:55:42 +00:00
Compare commits
18 Commits
refactor-s
...
update-yau
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bb590c20aa | ||
|
|
93f5defedc | ||
|
|
33241a6b12 | ||
|
|
ff87b4db26 | ||
|
|
ba9fadc8f1 | ||
|
|
d29d4f88da | ||
|
|
17c5ceeb9d | ||
|
|
c32d746069 | ||
|
|
b9a0ff0dd6 | ||
|
|
75496af985 | ||
|
|
0e9eb9eedb | ||
|
|
3a78e988da | ||
|
|
d9e5074189 | ||
|
|
bc210bdc00 | ||
|
|
4bf83f701c | ||
|
|
db3887929f | ||
|
|
9af103a88e | ||
|
|
99211eb375 |
317
Cargo.lock
generated
317
Cargo.lock
generated
@@ -36,9 +36,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-http"
|
||||
version = "3.7.0"
|
||||
version = "3.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d"
|
||||
checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
|
||||
dependencies = [
|
||||
"actix-codec",
|
||||
"actix-rt",
|
||||
@@ -46,7 +46,7 @@ dependencies = [
|
||||
"actix-tls",
|
||||
"actix-utils",
|
||||
"ahash",
|
||||
"base64 0.22.1",
|
||||
"base64 0.21.7",
|
||||
"bitflags 2.5.0",
|
||||
"brotli",
|
||||
"bytes",
|
||||
@@ -85,15 +85,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-router"
|
||||
version = "0.5.3"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8"
|
||||
checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799"
|
||||
dependencies = [
|
||||
"bytestring",
|
||||
"cfg-if",
|
||||
"http 0.2.11",
|
||||
"regex",
|
||||
"regex-lite",
|
||||
"serde",
|
||||
"tracing",
|
||||
]
|
||||
@@ -140,9 +138,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-tls"
|
||||
version = "3.4.0"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389"
|
||||
checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
|
||||
dependencies = [
|
||||
"actix-rt",
|
||||
"actix-service",
|
||||
@@ -169,9 +167,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-web"
|
||||
version = "4.6.0"
|
||||
version = "4.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32"
|
||||
checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
|
||||
dependencies = [
|
||||
"actix-codec",
|
||||
"actix-http",
|
||||
@@ -198,7 +196,7 @@ dependencies = [
|
||||
"mime",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"regex-lite",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
@@ -222,9 +220,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-web-static-files"
|
||||
version = "4.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adf6d1ef6d7a60e084f9e0595e2a5234abda14e76c105ecf8e2d0e8800c41a1f"
|
||||
version = "3.0.5"
|
||||
source = "git+https://github.com/kilork/actix-web-static-files.git?rev=2d3b6160#2d3b6160f0de4ba061c5d76b5704f34fb677f6df"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"derive_more",
|
||||
@@ -616,9 +613,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "6.0.0"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b"
|
||||
checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
@@ -627,9 +624,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "brotli-decompressor"
|
||||
version = "4.0.1"
|
||||
version = "2.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362"
|
||||
checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
@@ -898,9 +895,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.8.11"
|
||||
version = "0.8.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6"
|
||||
checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"cow-utils",
|
||||
@@ -989,7 +986,7 @@ dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim 0.10.0",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1280,12 +1277,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.20.9"
|
||||
version = "0.20.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1"
|
||||
checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
|
||||
dependencies = [
|
||||
"darling_core 0.20.9",
|
||||
"darling_macro 0.20.9",
|
||||
"darling_core 0.20.3",
|
||||
"darling_macro 0.20.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1298,21 +1295,21 @@ dependencies = [
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim 0.10.0",
|
||||
"strsim",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.20.9"
|
||||
version = "0.20.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120"
|
||||
checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim 0.11.1",
|
||||
"strsim",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
@@ -1329,11 +1326,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.20.9"
|
||||
version = "0.20.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178"
|
||||
checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
|
||||
dependencies = [
|
||||
"darling_core 0.20.9",
|
||||
"darling_core 0.20.3",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
@@ -1386,15 +1383,6 @@ dependencies = [
|
||||
"derive_builder_macro 0.13.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
|
||||
dependencies = [
|
||||
"derive_builder_macro 0.20.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_core"
|
||||
version = "0.12.0"
|
||||
@@ -1419,18 +1407,6 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_core"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
|
||||
dependencies = [
|
||||
"darling 0.20.9",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_macro"
|
||||
version = "0.12.0"
|
||||
@@ -1451,16 +1427,6 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_macro"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
|
||||
dependencies = [
|
||||
"derive_builder_core 0.20.0",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
@@ -1488,7 +1454,7 @@ dependencies = [
|
||||
"serde-cs",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"strsim 0.10.0",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1741,6 +1707,29 @@ dependencies = [
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"env_filter",
|
||||
"humantime",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
@@ -1795,7 +1784,7 @@ version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5"
|
||||
dependencies = [
|
||||
"darling 0.20.9",
|
||||
"darling 0.20.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
@@ -2390,6 +2379,12 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.27"
|
||||
@@ -2783,9 +2778,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f"
|
||||
checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88"
|
||||
dependencies = [
|
||||
"lindera-analyzer",
|
||||
"lindera-core",
|
||||
@@ -2796,9 +2791,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-analyzer"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133"
|
||||
checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2826,9 +2821,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1"
|
||||
checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2840,21 +2835,29 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict-builder"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820"
|
||||
checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"lindera-dictionary-builder",
|
||||
"log",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-compress"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c"
|
||||
checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2863,9 +2866,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-core"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15"
|
||||
checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2880,9 +2883,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-decompress"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12"
|
||||
checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2891,9 +2894,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-dictionary"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e"
|
||||
checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2914,33 +2917,11 @@ dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-dictionary-builder"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"derive_builder 0.20.0",
|
||||
"encoding",
|
||||
"encoding_rs",
|
||||
"encoding_rs_io",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-filter"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a"
|
||||
checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"csv",
|
||||
@@ -2963,9 +2944,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664"
|
||||
checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2977,21 +2958,31 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-builder"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d"
|
||||
checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding_rs",
|
||||
"encoding_rs_io",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"lindera-dictionary-builder",
|
||||
"log",
|
||||
"serde",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1"
|
||||
checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -3003,21 +2994,31 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd-builder"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d"
|
||||
checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding_rs",
|
||||
"encoding_rs_io",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"lindera-dictionary-builder",
|
||||
"log",
|
||||
"serde",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db"
|
||||
checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -3033,21 +3034,29 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic-builder"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51"
|
||||
checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"lindera-dictionary-builder",
|
||||
"log",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-tokenizer"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14"
|
||||
checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"lindera-core",
|
||||
@@ -3059,9 +3068,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597"
|
||||
checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -3077,14 +3086,22 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic-builder"
|
||||
version = "0.31.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974"
|
||||
checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"lindera-dictionary-builder",
|
||||
"log",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4323,12 +4340,6 @@ dependencies = [
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-lite"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.2"
|
||||
@@ -4377,6 +4388,12 @@ dependencies = [
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "retain_mut"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.8"
|
||||
@@ -4394,12 +4411,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.5"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8"
|
||||
checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"retain_mut",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@@ -4882,12 +4900,6 @@ version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.26.2"
|
||||
@@ -5051,18 +5063,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.58"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
|
||||
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.58"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
|
||||
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -5301,9 +5313,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-actix-web"
|
||||
version = "0.7.10"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa069bd1503dd526ee793bb3fce408895136c95fc86d2edb2acf1c646d7f0684"
|
||||
checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"mutually_exclusive_features",
|
||||
@@ -6078,12 +6090,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "yaup"
|
||||
version = "0.2.1"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a59e7d27bed43f7c37c25df5192ea9d435a8092a902e02203359ac9ce3e429d9"
|
||||
checksum = "b0144f1a16a199846cb21024da74edd930b43443463292f536b7110b4855b5c6"
|
||||
dependencies = [
|
||||
"form_urlencoded",
|
||||
"serde",
|
||||
"url",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.6.0", default-features = false }
|
||||
actix-web = { version = "4.5.1", default-features = false }
|
||||
anyhow = "1.0.79"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
@@ -30,12 +30,7 @@ serde_json = "1.0.111"
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.35"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
|
||||
@@ -189,6 +189,4 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
|
||||
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
|
||||
merge_with_error_impl_take_error_message!(InvalidSimilarId);
|
||||
|
||||
@@ -241,8 +241,6 @@ InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -507,21 +505,6 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
||||
@@ -14,20 +14,20 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.7.0", default-features = false, features = [
|
||||
actix-http = { version = "3.6.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.6.0", default-features = false, features = [
|
||||
actix-web = { version = "4.5.1", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-web-static-files = { version = "4.0.1", optional = true }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
anyhow = { version = "1.0.79", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.77"
|
||||
@@ -98,27 +98,26 @@ tokio-stream = "0.1.14"
|
||||
toml = "0.8.8"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.4.0"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.0", features = ["serde"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.10"
|
||||
tracing-actix-web = "0.7.9"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.9.0"
|
||||
assert-json-diff = "2.0.2"
|
||||
brotli = "6.0.0"
|
||||
brotli = "3.4.0"
|
||||
insta = "1.34.0"
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
urlencoding = "2.1.3"
|
||||
yaup = "0.2.1"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.79", optional = true }
|
||||
|
||||
@@ -648,7 +648,6 @@ pub struct SearchAggregator {
|
||||
// scoring
|
||||
show_ranking_score: bool,
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
}
|
||||
|
||||
impl SearchAggregator {
|
||||
@@ -677,7 +676,6 @@ impl SearchAggregator {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -750,7 +748,6 @@ impl SearchAggregator {
|
||||
|
||||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||
|
||||
if let Some(hybrid) = hybrid {
|
||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||
@@ -824,7 +821,6 @@ impl SearchAggregator {
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
@@ -908,7 +904,6 @@ impl SearchAggregator {
|
||||
// scoring
|
||||
self.show_ranking_score |= show_ranking_score;
|
||||
self.show_ranking_score_details |= show_ranking_score_details;
|
||||
self.ranking_score_threshold |= ranking_score_threshold;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
@@ -950,7 +945,6 @@ impl SearchAggregator {
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
@@ -1021,7 +1015,6 @@ impl SearchAggregator {
|
||||
"scoring": {
|
||||
"show_ranking_score": show_ranking_score,
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -1094,7 +1087,6 @@ impl MultiSearchAggregator {
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
} = query;
|
||||
|
||||
index_uid.as_str()
|
||||
@@ -1242,7 +1234,6 @@ impl FacetSearchAggregator {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -1257,8 +1248,7 @@ impl FacetSearchAggregator {
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some()
|
||||
|| hybrid.is_some()
|
||||
|| ranking_score_threshold.is_some();
|
||||
|| hybrid.is_some();
|
||||
|
||||
ret
|
||||
}
|
||||
@@ -1634,7 +1624,6 @@ pub struct SimilarAggregator {
|
||||
// scoring
|
||||
show_ranking_score: bool,
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
}
|
||||
|
||||
impl SimilarAggregator {
|
||||
@@ -1649,7 +1638,6 @@ impl SimilarAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -1687,7 +1675,6 @@ impl SimilarAggregator {
|
||||
|
||||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||
|
||||
ret.embedder = embedder.is_some();
|
||||
|
||||
@@ -1721,7 +1708,6 @@ impl SimilarAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
@@ -1763,7 +1749,6 @@ impl SimilarAggregator {
|
||||
// scoring
|
||||
self.show_ranking_score |= show_ranking_score;
|
||||
self.show_ranking_score_details |= show_ranking_score_details;
|
||||
self.ranking_score_threshold |= ranking_score_threshold;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
@@ -1784,7 +1769,6 @@ impl SimilarAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
@@ -1824,7 +1808,6 @@ impl SimilarAggregator {
|
||||
"scoring": {
|
||||
"show_ranking_score": show_ranking_score,
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
@@ -46,8 +46,6 @@ pub struct FacetSearchQuery {
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
@@ -105,7 +103,6 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = value;
|
||||
|
||||
SearchQuery {
|
||||
@@ -131,7 +128,6 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
vector,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,10 +19,9 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
|
||||
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -83,21 +82,6 @@ pub struct SearchQueryGet {
|
||||
pub hybrid_embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
|
||||
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
|
||||
|
||||
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
||||
type Error = InvalidSearchRankingScoreThreshold;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
|
||||
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
|
||||
@@ -168,7 +152,6 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
matching_strategy: other.matching_strategy,
|
||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,8 +6,8 @@ use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
|
||||
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarRankingScoreThreshold,
|
||||
InvalidSimilarShowRankingScore, InvalidSimilarShowRankingScoreDetails,
|
||||
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
|
||||
InvalidSimilarShowRankingScoreDetails,
|
||||
};
|
||||
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
@@ -21,8 +21,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, SearchKind, SimilarQuery,
|
||||
SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@@ -42,7 +42,9 @@ pub async fn similar_get(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.0.try_into()?;
|
||||
let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||
})?;
|
||||
|
||||
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||
|
||||
@@ -128,27 +130,12 @@ pub struct SimilarQueryGet {
|
||||
show_ranking_score: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
|
||||
show_ranking_score_details: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub embedder: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
|
||||
|
||||
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
||||
type Error = InvalidSimilarRankingScoreThreshold;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
|
||||
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
type Error = ResponseError;
|
||||
type Error = InvalidSimilarId;
|
||||
|
||||
fn try_from(
|
||||
SimilarQueryGet {
|
||||
@@ -160,7 +147,6 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
}: SimilarQueryGet,
|
||||
) -> Result<Self, Self::Error> {
|
||||
let filter = match filter {
|
||||
@@ -172,9 +158,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
};
|
||||
|
||||
Ok(SimilarQuery {
|
||||
id: id.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||
})?,
|
||||
id: id.0.try_into()?,
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
filter,
|
||||
@@ -182,7 +166,6 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||
show_ranking_score: show_ranking_score.0,
|
||||
show_ranking_score_details: show_ranking_score_details.0,
|
||||
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,44 +87,6 @@ pub struct SearchQuery {
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
|
||||
pub struct RankingScoreThreshold(f64);
|
||||
|
||||
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
|
||||
type Error = InvalidSearchRankingScoreThreshold;
|
||||
|
||||
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
|
||||
#[allow(clippy::manual_range_contains)]
|
||||
if f > 1.0 || f < 0.0 {
|
||||
Err(InvalidSearchRankingScoreThreshold)
|
||||
} else {
|
||||
Ok(RankingScoreThreshold(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdSimilar(f64);
|
||||
|
||||
impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
|
||||
type Error = InvalidSimilarRankingScoreThreshold;
|
||||
|
||||
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
|
||||
#[allow(clippy::manual_range_contains)]
|
||||
if f > 1.0 || f < 0.0 {
|
||||
Err(InvalidSimilarRankingScoreThreshold)
|
||||
} else {
|
||||
Ok(Self(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
|
||||
@@ -155,7 +117,6 @@ impl fmt::Debug for SearchQuery {
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchQuery");
|
||||
@@ -227,9 +188,6 @@ impl fmt::Debug for SearchQuery {
|
||||
debug.field("highlight_pre_tag", &highlight_pre_tag);
|
||||
debug.field("highlight_post_tag", &highlight_post_tag);
|
||||
debug.field("crop_marker", &crop_marker);
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
debug.field("ranking_score_threshold", &ranking_score_threshold);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
@@ -398,8 +356,6 @@ pub struct SearchQueryWithIndex {
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
}
|
||||
|
||||
impl SearchQueryWithIndex {
|
||||
@@ -428,7 +384,6 @@ impl SearchQueryWithIndex {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@@ -455,7 +410,6 @@ impl SearchQueryWithIndex {
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@@ -482,8 +436,6 @@ pub struct SimilarQuery {
|
||||
pub show_ranking_score: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
|
||||
pub show_ranking_score_details: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||
@@ -712,9 +664,6 @@ fn prepare_search<'t>(
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(rtxn);
|
||||
search.time_budget(time_budget);
|
||||
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
|
||||
search.ranking_score_threshold(ranking_score_threshold.0);
|
||||
}
|
||||
|
||||
match search_kind {
|
||||
SearchKind::KeywordOnly => {
|
||||
@@ -756,16 +705,11 @@ fn prepare_search<'t>(
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
search.scoring_strategy(
|
||||
if query.show_ranking_score
|
||||
|| query.show_ranking_score_details
|
||||
|| query.ranking_score_threshold.is_some()
|
||||
{
|
||||
ScoringStrategy::Detailed
|
||||
} else {
|
||||
ScoringStrategy::Skip
|
||||
},
|
||||
);
|
||||
search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
|
||||
ScoringStrategy::Detailed
|
||||
} else {
|
||||
ScoringStrategy::Skip
|
||||
});
|
||||
|
||||
// compute the offset on the limit depending on the pagination mode.
|
||||
let (offset, limit) = if is_finite_pagination {
|
||||
@@ -843,6 +787,10 @@ pub fn perform_search(
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
vector: _,
|
||||
hybrid: _,
|
||||
// already computed from prepare_search
|
||||
offset: _,
|
||||
limit,
|
||||
page,
|
||||
hits_per_page,
|
||||
@@ -853,19 +801,14 @@ pub fn perform_search(
|
||||
show_matches_position,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter: _,
|
||||
sort,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
// already used in prepare_search
|
||||
vector: _,
|
||||
hybrid: _,
|
||||
offset: _,
|
||||
ranking_score_threshold: _,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
filter: _,
|
||||
} = query;
|
||||
|
||||
let format = AttributesFormat {
|
||||
@@ -1127,7 +1070,6 @@ pub fn perform_similar(
|
||||
attributes_to_retrieve,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
// using let-else rather than `?` so that the borrow checker identifies we're always returning here,
|
||||
@@ -1151,10 +1093,6 @@ pub fn perform_similar(
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
similar.ranking_score_threshold(ranking_score_threshold.0);
|
||||
}
|
||||
|
||||
let milli::SearchResult {
|
||||
documents_ids,
|
||||
matching_words: _,
|
||||
|
||||
@@ -40,9 +40,8 @@ pub struct Permit {
|
||||
|
||||
impl Drop for Permit {
|
||||
fn drop(&mut self) {
|
||||
let sender = self.sender.clone();
|
||||
// if the channel is closed then the whole instance is down
|
||||
std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
|
||||
let _ = futures::executor::block_on(self.sender.send(()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -376,7 +376,7 @@ impl Index<'_> {
|
||||
}
|
||||
|
||||
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query);
|
||||
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
@@ -413,7 +413,7 @@ impl Index<'_> {
|
||||
}
|
||||
|
||||
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
|
||||
let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query);
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ async fn search_bad_offset() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("offset=doggo").await;
|
||||
let (response, code) = index.search_get("?offset=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -99,7 +99,7 @@ async fn search_bad_limit() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("limit=doggo").await;
|
||||
let (response, code) = index.search_get("?limit=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -127,7 +127,7 @@ async fn search_bad_page() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("page=doggo").await;
|
||||
let (response, code) = index.search_get("?page=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -155,7 +155,7 @@ async fn search_bad_hits_per_page() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("hitsPerPage=doggo").await;
|
||||
let (response, code) = index.search_get("?hitsPerPage=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -201,7 +201,7 @@ async fn search_bad_crop_length() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("cropLength=doggo").await;
|
||||
let (response, code) = index.search_get("?cropLength=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -291,7 +291,7 @@ async fn search_bad_show_matches_position() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("showMatchesPosition=doggo").await;
|
||||
let (response, code) = index.search_get("?showMatchesPosition=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -321,40 +321,6 @@ async fn search_bad_facets() {
|
||||
// Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings.
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_bad_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_search_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_invalid_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_non_filterable_facets() {
|
||||
let server = Server::new().await;
|
||||
@@ -374,7 +340,7 @@ async fn search_non_filterable_facets() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("facets=doggo").await;
|
||||
let (response, code) = index.search_get("?facets=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -404,7 +370,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("facets=doggo").await;
|
||||
let (response, code) = index.search_get("?facets=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -434,7 +400,7 @@ async fn search_non_filterable_facets_no_filterable() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("facets=doggo").await;
|
||||
let (response, code) = index.search_get("?facets=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -464,7 +430,7 @@ async fn search_non_filterable_facets_multiple_facets() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("facets=doggo,neko").await;
|
||||
let (response, code) = index.search_get("?facets=doggo,neko").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -557,7 +523,7 @@ async fn search_bad_matching_strategy() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.search_get("matchingStrategy=doggo").await;
|
||||
let (response, code) = index.search_get("?matchingStrategy=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
|
||||
@@ -48,31 +48,6 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
])
|
||||
});
|
||||
|
||||
static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
},
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
@@ -985,213 +960,6 @@ async fn test_score_details() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_score() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
|
||||
let res = index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(res.0.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": "Badman the dark knight returns 1",
|
||||
"showRankingScore": true,
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.9746605609456898
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.8055252965383685
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_rankingScore": 0.16666666666666666
|
||||
},
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
"_rankingScore": 0.07702020202020202
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
"_rankingScore": 0.07702020202020202
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_score_threshold() {
|
||||
let query = "Badman dark returns 1";
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = SCORE_DOCUMENTS.clone();
|
||||
|
||||
let res = index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(res.0.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.0
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.6685627880184332
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_rankingScore": 0.25
|
||||
},
|
||||
{
|
||||
"title": "Batman Returns",
|
||||
"id": "C",
|
||||
"_rankingScore": 0.11553030303030302
|
||||
},
|
||||
{
|
||||
"title": "Batman",
|
||||
"id": "D",
|
||||
"_rankingScore": 0.11553030303030302
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.2
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.6685627880184332
|
||||
},
|
||||
{
|
||||
"title": "Badman",
|
||||
"id": "E",
|
||||
"_rankingScore": 0.25
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.5
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
},
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 2",
|
||||
"id": "B",
|
||||
"_rankingScore": 0.6685627880184332
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 0.8
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Batman the dark knight returns: Part 1",
|
||||
"id": "A",
|
||||
"_rankingScore": 0.93430081300813
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": query,
|
||||
"showRankingScore": true,
|
||||
"rankingScoreThreshold": 1.0
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###);
|
||||
// nobody is perfect
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_degraded_score_details() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -87,68 +87,6 @@ async fn similar_bad_id() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_bad_ranking_score_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`",
|
||||
"code": "invalid_similar_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_invalid_ranking_score_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_similar_ranking_score_threshold",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn similar_invalid_id() {
|
||||
let server = Server::new().await;
|
||||
@@ -241,7 +179,7 @@ async fn similar_bad_offset() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("id=287947&offset=doggo").await;
|
||||
let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -283,7 +221,7 @@ async fn similar_bad_limit() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.similar_get("id=287946&limit=doggo").await;
|
||||
let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
|
||||
@@ -194,235 +194,6 @@ async fn basic() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn ranking_score_threshold() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
"filterableAttributes": ["title"]}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.39060014486312866
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.2819308042526245
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.1662663221359253
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.39060014486312866
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.2819308042526245
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.39060014486312866
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"release_year": 2019,
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
]
|
||||
},
|
||||
"_rankingScore": 0.890957772731781
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.9.0"
|
||||
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.8.11", default-features = false }
|
||||
charabia = { version = "0.8.10", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.11"
|
||||
deserr = "0.6.1"
|
||||
|
||||
@@ -66,7 +66,6 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
&mut DefaultSearchLogger,
|
||||
logger,
|
||||
TimeBudget::max(),
|
||||
None,
|
||||
)?;
|
||||
if let Some((logger, dir)) = detailed_logger {
|
||||
logger.finish(&mut ctx, Path::new(dir))?;
|
||||
|
||||
@@ -47,12 +47,6 @@ pub struct FacetGroupValue {
|
||||
pub bitmap: RoaringBitmap,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FacetGroupLazyValue<'b> {
|
||||
pub size: u8,
|
||||
pub bitmap_bytes: &'b [u8],
|
||||
}
|
||||
|
||||
pub struct FacetGroupKeyCodec<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
@@ -75,7 +69,6 @@ where
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||
where
|
||||
T: BytesDecode<'a>,
|
||||
@@ -91,7 +84,6 @@ where
|
||||
}
|
||||
|
||||
pub struct FacetGroupValueCodec;
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
@@ -101,23 +93,11 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
Ok(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetGroupLazyValueCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
|
||||
type DItem = FacetGroupLazyValue<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, Cursor};
|
||||
use std::io;
|
||||
use std::mem::size_of;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
@@ -57,24 +57,6 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn intersection_with_serialized(
|
||||
mut bytes: &[u8],
|
||||
other: &RoaringBitmap,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// See above `deserialize_from` method for implementation details.
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||
if other.contains(integer) {
|
||||
bitmap.insert(integer);
|
||||
}
|
||||
}
|
||||
Ok(bitmap)
|
||||
} else {
|
||||
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||
///
|
||||
/// if the merged values length is under the threshold, values are directly
|
||||
|
||||
@@ -38,7 +38,7 @@ where
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||
Ok(())
|
||||
} else {
|
||||
@@ -81,7 +81,7 @@ where
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
// We first fill the heap with values from the highest level
|
||||
let starting_key =
|
||||
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
|
||||
@@ -4,11 +4,9 @@ use heed::BytesEncode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use crate::Result;
|
||||
|
||||
/// Find all the document ids for which the given field contains a value contained within
|
||||
/// the two bounds.
|
||||
@@ -18,7 +16,6 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||
field_id: u16,
|
||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
docids: &mut RoaringBitmap,
|
||||
) -> Result<()>
|
||||
where
|
||||
@@ -49,15 +46,13 @@ where
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(starting_left_bound) =
|
||||
get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
|
||||
{
|
||||
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
let rightmost_bound =
|
||||
Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
let group_size = usize::MAX;
|
||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||
Ok(())
|
||||
@@ -69,16 +64,12 @@ where
|
||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
left: Bound<&'b [u8]>,
|
||||
right: Bound<&'b [u8]>,
|
||||
/// The subset of documents ids that are useful for this search.
|
||||
/// Great performance optimizations can be achieved by only fetching values matching this subset.
|
||||
universe: Option<&'bitmap RoaringBitmap>,
|
||||
docids: &'bitmap mut RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
||||
let left_key =
|
||||
@@ -113,13 +104,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
}
|
||||
|
||||
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||
};
|
||||
*self.docids |= value.bitmap;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -210,13 +195,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
left_condition && right_condition
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
*self.docids |= &previous_value.bitmap;
|
||||
previous_key = next_key;
|
||||
previous_value = next_value;
|
||||
continue;
|
||||
@@ -312,13 +291,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
left_condition && right_condition
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
*self.docids |= &previous_value.bitmap;
|
||||
} else {
|
||||
let level = level - 1;
|
||||
let starting_left_bound = previous_key.left_bound;
|
||||
@@ -392,7 +365,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -412,7 +384,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -447,7 +418,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -469,7 +439,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -505,7 +474,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -531,7 +499,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -570,7 +537,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -590,7 +556,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -606,7 +571,6 @@ mod tests {
|
||||
0,
|
||||
&Bound::Unbounded,
|
||||
&Bound::Unbounded,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -622,7 +586,6 @@ mod tests {
|
||||
1,
|
||||
&Bound::Unbounded,
|
||||
&Bound::Unbounded,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -658,7 +621,6 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -672,7 +634,6 @@ mod tests {
|
||||
1,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>(
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||
|
||||
|
||||
@@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>(
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
|
||||
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||
Ok(itertools::Either::Left(DescendingFacetSort {
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::facet_range_search;
|
||||
@@ -224,14 +224,14 @@ impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields, None)
|
||||
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
field_id: FieldId,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
operator: &Condition<'a>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let numbers_db = index.facet_id_f64_docids;
|
||||
@@ -291,22 +291,14 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
Condition::NotEqual(val) => {
|
||||
let operator = Condition::Equal(val.clone());
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
return Ok(all_ids - docids);
|
||||
}
|
||||
};
|
||||
|
||||
let mut output = RoaringBitmap::new();
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
numbers_db,
|
||||
field_id,
|
||||
left,
|
||||
right,
|
||||
universe,
|
||||
&mut output,
|
||||
)?;
|
||||
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
@@ -318,7 +310,6 @@ impl<'a> Filter<'a> {
|
||||
field_id: FieldId,
|
||||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
match (left, right) {
|
||||
@@ -330,7 +321,7 @@ impl<'a> Filter<'a> {
|
||||
(_, _) => (),
|
||||
}
|
||||
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
rtxn, db, field_id, &left, &right, universe, output,
|
||||
rtxn, db, field_id, &left, &right, output,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
@@ -341,37 +332,31 @@ impl<'a> Filter<'a> {
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
filterable_fields: &HashSet<String>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
if universe.map_or(false, |u| u.is_empty()) {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
|
||||
match &self.condition {
|
||||
FilterCondition::Not(f) => {
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
let selected = Self::inner_evaluate(
|
||||
&(f.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
match universe {
|
||||
Some(universe) => Ok(universe - selected),
|
||||
None => {
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
Ok(all_ids - selected)
|
||||
}
|
||||
}
|
||||
Ok(all_ids - selected)
|
||||
}
|
||||
FilterCondition::In { fid, els } => {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
els.iter()
|
||||
.map(|el| Condition::Equal(el.clone()))
|
||||
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
|
||||
.union()
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
|
||||
for el in els {
|
||||
let op = Condition::Equal(el.clone());
|
||||
let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
|
||||
bitmap |= el_bitmap;
|
||||
}
|
||||
Ok(bitmap)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
@@ -386,7 +371,7 @@ impl<'a> Filter<'a> {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
Self::evaluate_operator(rtxn, index, fid, universe, op)
|
||||
Self::evaluate_operator(rtxn, index, fid, op)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
@@ -397,11 +382,14 @@ impl<'a> Filter<'a> {
|
||||
}))?
|
||||
}
|
||||
}
|
||||
FilterCondition::Or(subfilters) => subfilters
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
|
||||
.union(),
|
||||
FilterCondition::Or(subfilters) => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
for f in subfilters {
|
||||
bitmap |=
|
||||
Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
|
||||
}
|
||||
Ok(bitmap)
|
||||
}
|
||||
FilterCondition::And(subfilters) => {
|
||||
let mut subfilters_iter = subfilters.iter();
|
||||
if let Some(first_subfilter) = subfilters_iter.next() {
|
||||
@@ -410,21 +398,16 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
for f in subfilters_iter {
|
||||
if bitmap.is_empty() {
|
||||
return Ok(bitmap);
|
||||
}
|
||||
// TODO We are doing the intersections two times,
|
||||
// it could be more efficient
|
||||
// Can't I just replace this `&=` by an `=`?
|
||||
bitmap &= Self::inner_evaluate(
|
||||
&(f.clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
Some(&bitmap),
|
||||
)?;
|
||||
}
|
||||
Ok(bitmap)
|
||||
@@ -524,7 +507,6 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
let geo_lng_token = Token::new(
|
||||
@@ -557,7 +539,6 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
let condition_right = FilterCondition::Condition {
|
||||
@@ -571,7 +552,6 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
left | right
|
||||
@@ -587,7 +567,6 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?
|
||||
};
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::{BadGeoError, Filter};
|
||||
pub use self::search::{FacetValueHit, SearchForFacetValues};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{Index, Result};
|
||||
|
||||
@@ -54,9 +54,9 @@ pub fn facet_max_value<'t>(
|
||||
}
|
||||
|
||||
/// Get the first facet value in the facet database
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@@ -78,9 +78,9 @@ where
|
||||
}
|
||||
|
||||
/// Get the last facet value in the facet database
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@@ -102,9 +102,9 @@ where
|
||||
}
|
||||
|
||||
/// Get the height of the highest level in the facet database
|
||||
pub(crate) fn get_highest_level<'t, DC>(
|
||||
pub(crate) fn get_highest_level<'t>(
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<u8> {
|
||||
let field_id_prefix = &field_id.to_be_bytes();
|
||||
|
||||
@@ -169,7 +169,6 @@ impl<'a> Search<'a> {
|
||||
index: self.index,
|
||||
semantic: self.semantic.clone(),
|
||||
time_budget: self.time_budget.clone(),
|
||||
ranking_score_threshold: self.ranking_score_threshold,
|
||||
};
|
||||
|
||||
let semantic = search.semantic.take();
|
||||
|
||||
@@ -50,7 +50,6 @@ pub struct Search<'a> {
|
||||
index: &'a Index,
|
||||
semantic: Option<SemanticSearch>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
}
|
||||
|
||||
impl<'a> Search<'a> {
|
||||
@@ -71,7 +70,6 @@ impl<'a> Search<'a> {
|
||||
index,
|
||||
semantic: None,
|
||||
time_budget: TimeBudget::max(),
|
||||
ranking_score_threshold: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,11 +146,6 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Search<'a> {
|
||||
self.ranking_score_threshold = Some(ranking_score_threshold);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||
if has_vector_search {
|
||||
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||
@@ -191,7 +184,6 @@ impl<'a> Search<'a> {
|
||||
embedder_name,
|
||||
embedder,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?
|
||||
}
|
||||
_ => execute_search(
|
||||
@@ -209,7 +201,6 @@ impl<'a> Search<'a> {
|
||||
&mut DefaultSearchLogger,
|
||||
&mut DefaultSearchLogger,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?,
|
||||
};
|
||||
|
||||
@@ -248,7 +239,6 @@ impl fmt::Debug for Search<'_> {
|
||||
index: _,
|
||||
semantic,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
} = self;
|
||||
f.debug_struct("Search")
|
||||
.field("query", query)
|
||||
@@ -267,7 +257,6 @@ impl fmt::Debug for Search<'_> {
|
||||
&semantic.as_ref().map(|semantic| &semantic.embedder_name),
|
||||
)
|
||||
.field("time_budget", time_budget)
|
||||
.field("ranking_score_threshold", ranking_score_threshold)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
scoring_strategy: ScoringStrategy,
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
) -> Result<BucketSortOutput> {
|
||||
logger.initial_query(query);
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
@@ -165,19 +164,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
loop {
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
maybe_add_to_results!(bucket);
|
||||
|
||||
ranking_rule_scores.pop();
|
||||
|
||||
if cur_ranking_rule_index == 0 {
|
||||
@@ -233,18 +220,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
debug_assert!(
|
||||
ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates)
|
||||
);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -=
|
||||
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
|
||||
|
||||
if cur_ranking_rule_index == ranking_rules_len - 1
|
||||
|
||||
@@ -523,7 +523,6 @@ mod tests {
|
||||
&mut crate::DefaultSearchLogger,
|
||||
&mut crate::DefaultSearchLogger,
|
||||
TimeBudget::max(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -548,7 +548,6 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
||||
pub fn filtered_universe(
|
||||
index: &Index,
|
||||
txn: &RoTxn<'_>,
|
||||
@@ -574,7 +573,6 @@ pub fn execute_vector_search(
|
||||
embedder_name: &str,
|
||||
embedder: &Embedder,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
) -> Result<PartialSearchResult> {
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
@@ -604,7 +602,6 @@ pub fn execute_vector_search(
|
||||
scoring_strategy,
|
||||
placeholder_search_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
)?;
|
||||
|
||||
Ok(PartialSearchResult {
|
||||
@@ -634,7 +631,6 @@ pub fn execute_search(
|
||||
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
) -> Result<PartialSearchResult> {
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
@@ -723,7 +719,6 @@ pub fn execute_search(
|
||||
scoring_strategy,
|
||||
query_graph_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
@@ -738,7 +733,6 @@ pub fn execute_search(
|
||||
scoring_strategy,
|
||||
placeholder_search_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
)?
|
||||
};
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ pub struct Similar<'a> {
|
||||
index: &'a Index,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
}
|
||||
|
||||
impl<'a> Similar<'a> {
|
||||
@@ -30,17 +29,7 @@ impl<'a> Similar<'a> {
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
filter: None,
|
||||
offset,
|
||||
limit,
|
||||
rtxn,
|
||||
index,
|
||||
embedder_name,
|
||||
embedder,
|
||||
ranking_score_threshold: None,
|
||||
}
|
||||
Self { id, filter: None, offset, limit, rtxn, index, embedder_name, embedder }
|
||||
}
|
||||
|
||||
pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self {
|
||||
@@ -48,18 +37,8 @@ impl<'a> Similar<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Self {
|
||||
self.ranking_score_threshold = Some(ranking_score_threshold);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<SearchResult> {
|
||||
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
|
||||
|
||||
// we never want to receive the docid
|
||||
universe.remove(self.id);
|
||||
|
||||
let universe = universe;
|
||||
let universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
|
||||
|
||||
let embedder_index =
|
||||
self.index
|
||||
@@ -98,8 +77,6 @@ impl<'a> Similar<'a> {
|
||||
let mut documents_seen = RoaringBitmap::new();
|
||||
documents_seen.insert(self.id);
|
||||
|
||||
let mut candidates = universe;
|
||||
|
||||
for (docid, distance) in results
|
||||
.into_iter()
|
||||
// skip documents we've already seen & mark that we saw the current document
|
||||
@@ -108,6 +85,8 @@ impl<'a> Similar<'a> {
|
||||
// take **after** filter and skip so that we get exactly limit elements if available
|
||||
.take(self.limit)
|
||||
{
|
||||
documents_ids.push(docid);
|
||||
|
||||
let score = 1.0 - distance;
|
||||
let score = self
|
||||
.embedder
|
||||
@@ -115,28 +94,14 @@ impl<'a> Similar<'a> {
|
||||
.map(|distribution| distribution.shift(score))
|
||||
.unwrap_or(score);
|
||||
|
||||
let score_details =
|
||||
vec![ScoreDetails::Vector(score_details::Vector { similarity: Some(score) })];
|
||||
let score = ScoreDetails::Vector(score_details::Vector { similarity: Some(score) });
|
||||
|
||||
let score = ScoreDetails::global_score(score_details.iter());
|
||||
|
||||
if let Some(ranking_score_threshold) = &self.ranking_score_threshold {
|
||||
if score < *ranking_score_threshold {
|
||||
// this document is no longer a candidate
|
||||
candidates.remove(docid);
|
||||
// any document after this one is no longer a candidate either, so restrict the set to documents already seen.
|
||||
candidates &= documents_seen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
documents_ids.push(docid);
|
||||
document_scores.push(score_details);
|
||||
document_scores.push(vec![score]);
|
||||
}
|
||||
|
||||
Ok(SearchResult {
|
||||
matching_words: Default::default(),
|
||||
candidates,
|
||||
candidates: universe,
|
||||
documents_ids,
|
||||
document_scores,
|
||||
degraded: false,
|
||||
|
||||
@@ -8,7 +8,6 @@ mod extract_vector_points;
|
||||
mod extract_word_docids;
|
||||
mod extract_word_pair_proximity_docids;
|
||||
mod extract_word_position_docids;
|
||||
// mod searchable;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use charabia::normalizer::NormalizedTokenIter;
|
||||
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::update::settings::InnerIndexSettings;
|
||||
use crate::{InternalError, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
|
||||
|
||||
pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;
|
||||
|
||||
pub struct FieldWordPositionExtractorBuilder<'a> {
|
||||
max_positions_per_attributes: u16,
|
||||
stop_words: Option<&'a fst::Set<Vec<u8>>>,
|
||||
separators: Option<Vec<&'a str>>,
|
||||
dictionary: Option<Vec<&'a str>>,
|
||||
}
|
||||
|
||||
impl<'a> FieldWordPositionExtractorBuilder<'a> {
|
||||
pub fn new(
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
settings: &'a InnerIndexSettings,
|
||||
) -> Result<Self> {
|
||||
let stop_words = settings.stop_words.as_ref();
|
||||
let separators: Option<Vec<_>> =
|
||||
settings.allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let dictionary: Option<Vec<_>> =
|
||||
settings.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
Ok(Self {
|
||||
max_positions_per_attributes: max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE as u16, |max| {
|
||||
max.min(MAX_POSITION_PER_ATTRIBUTE) as u16
|
||||
}),
|
||||
stop_words,
|
||||
separators,
|
||||
dictionary,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build(&'a self) -> FieldWordPositionExtractor<'a> {
|
||||
let builder = tokenizer_builder(
|
||||
self.stop_words,
|
||||
self.separators.as_deref(),
|
||||
self.dictionary.as_deref(),
|
||||
None,
|
||||
);
|
||||
|
||||
FieldWordPositionExtractor {
|
||||
tokenizer: builder.into_tokenizer(),
|
||||
max_positions_per_attributes: self.max_positions_per_attributes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldWordPositionExtractor<'a> {
|
||||
tokenizer: Tokenizer<'a>,
|
||||
max_positions_per_attributes: u16,
|
||||
}
|
||||
|
||||
impl<'a> FieldWordPositionExtractor<'a> {
|
||||
pub fn extract<'b>(
|
||||
&'a self,
|
||||
field_bytes: &[u8],
|
||||
buffer: &'b mut String,
|
||||
) -> Result<ExtractedFieldWordPosition<'a, 'b>> {
|
||||
let field_value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||
Ok(ExtractedFieldWordPosition {
|
||||
tokenizer: &self.tokenizer,
|
||||
max_positions_per_attributes: self.max_positions_per_attributes,
|
||||
field_value,
|
||||
buffer: buffer,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ExtractedFieldWordPosition<'a, 'b> {
|
||||
tokenizer: &'a Tokenizer<'a>,
|
||||
max_positions_per_attributes: u16,
|
||||
field_value: Value,
|
||||
buffer: &'b mut String,
|
||||
}
|
||||
|
||||
impl<'a> ExtractedFieldWordPosition<'a, '_> {
|
||||
pub fn iter<'o>(&'o mut self) -> FieldWordPositionIter<'o> {
|
||||
self.buffer.clear();
|
||||
let inner = match json_to_string(&self.field_value, &mut self.buffer) {
|
||||
Some(field) => Some(self.tokenizer.tokenize(field)),
|
||||
None => None,
|
||||
};
|
||||
|
||||
// create an iterator of token with their positions.
|
||||
FieldWordPositionIter {
|
||||
inner,
|
||||
max_positions_per_attributes: self.max_positions_per_attributes,
|
||||
position: 0,
|
||||
prev_kind: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldWordPositionIter<'a> {
|
||||
inner: Option<NormalizedTokenIter<'a, 'a>>,
|
||||
max_positions_per_attributes: u16,
|
||||
position: u16,
|
||||
prev_kind: Option<TokenKind>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for FieldWordPositionIter<'a> {
|
||||
type Item = (u16, Token<'a>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.position >= self.max_positions_per_attributes {
|
||||
return None;
|
||||
}
|
||||
|
||||
let token = self.inner.as_mut().map(|i| i.next()).flatten()?;
|
||||
|
||||
match token.kind {
|
||||
TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
|
||||
self.position += match self.prev_kind {
|
||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||
Some(_) => 1,
|
||||
None => 0,
|
||||
};
|
||||
self.prev_kind = Some(token.kind)
|
||||
}
|
||||
TokenKind::Separator(_) if self.position == 0 => {
|
||||
return self.next();
|
||||
}
|
||||
TokenKind::Separator(SeparatorKind::Hard) => {
|
||||
self.prev_kind = Some(token.kind);
|
||||
}
|
||||
TokenKind::Separator(SeparatorKind::Soft)
|
||||
if self.prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) =>
|
||||
{
|
||||
self.prev_kind = Some(token.kind);
|
||||
}
|
||||
_ => return self.next(),
|
||||
}
|
||||
|
||||
if !token.is_word() {
|
||||
return self.next();
|
||||
}
|
||||
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let lemma = token.lemma().trim();
|
||||
if !lemma.is_empty() && lemma.len() <= MAX_WORD_LENGTH {
|
||||
Some((self.position, token))
|
||||
} else {
|
||||
self.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Factorize tokenizer building.
|
||||
pub fn tokenizer_builder<'a>(
|
||||
stop_words: Option<&'a fst::Set<Vec<u8>>>,
|
||||
allowed_separators: Option<&'a [&str]>,
|
||||
dictionary: Option<&'a [&str]>,
|
||||
script_language: Option<&'a HashMap<Script, Vec<Language>>>,
|
||||
) -> TokenizerBuilder<'a, Vec<u8>> {
|
||||
let mut tokenizer_builder = TokenizerBuilder::new();
|
||||
if let Some(stop_words) = stop_words {
|
||||
tokenizer_builder.stop_words(stop_words);
|
||||
}
|
||||
if let Some(dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
if let Some(separators) = allowed_separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
|
||||
if let Some(script_language) = script_language {
|
||||
tokenizer_builder.allow_list(script_language);
|
||||
}
|
||||
|
||||
tokenizer_builder
|
||||
}
|
||||
|
||||
/// Transform a JSON value into a string that can be indexed.
|
||||
fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a str> {
|
||||
fn inner(value: &Value, output: &mut String) -> bool {
|
||||
use std::fmt::Write;
|
||||
match value {
|
||||
Value::Null | Value::Object(_) => false,
|
||||
Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
|
||||
Value::Number(number) => write!(output, "{}", number).is_ok(),
|
||||
Value::String(string) => write!(output, "{}", string).is_ok(),
|
||||
Value::Array(array) => {
|
||||
let mut count = 0;
|
||||
for value in array {
|
||||
if inner(value, output) {
|
||||
output.push_str(". ");
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
// check that at least one value was written
|
||||
count != 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Value::String(string) = value {
|
||||
Some(string)
|
||||
} else if inner(value, buffer) {
|
||||
Some(buffer)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::BufReader;
|
||||
|
||||
use field_word_position::FieldWordPositionExtractorBuilder;
|
||||
use obkv::KvReader;
|
||||
use roaring::RoaringBitmap;
|
||||
use word_docids::{WordDocidsDump, WordDocidsExtractor};
|
||||
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::update::index_documents::extract::extract_docid_word_positions::ScriptLanguageDocidsMap;
|
||||
use crate::update::index_documents::GrenadParameters;
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::{FieldId, Result, SerializationError};
|
||||
|
||||
mod field_word_position;
|
||||
mod word_docids;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_searchable_data<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||
let searchable_fields_to_index = settings_diff.searchable_fields_to_index();
|
||||
|
||||
let mut documents_ids = RoaringBitmap::new();
|
||||
|
||||
let add_builder =
|
||||
FieldWordPositionExtractorBuilder::new(max_positions_per_attributes, &settings_diff.new)?;
|
||||
let add_token_positions_extractor = add_builder.build();
|
||||
let del_builder;
|
||||
let del_token_positions_extractor = if settings_diff.settings_update_only {
|
||||
del_builder = FieldWordPositionExtractorBuilder::new(
|
||||
max_positions_per_attributes,
|
||||
&settings_diff.old,
|
||||
)?;
|
||||
del_builder.build()
|
||||
} else {
|
||||
add_builder.build()
|
||||
};
|
||||
let token_positions_extractor = &[del_token_positions_extractor, add_token_positions_extractor];
|
||||
|
||||
let mut word_map = BTreeMap::new();
|
||||
let mut word_docids_extractor = WordDocidsExtractor::new(settings_diff);
|
||||
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
// loop over documents
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
let document_id = key
|
||||
.try_into()
|
||||
.map(u32::from_be_bytes)
|
||||
.map_err(|_| SerializationError::InvalidNumberSerialization)?;
|
||||
let obkv = KvReader::<FieldId>::new(value);
|
||||
// if the searchable fields didn't change, skip the searchable indexing for this document.
|
||||
if !settings_diff.reindex_searchable()
|
||||
&& !searchable_fields_changed(&obkv, &searchable_fields_to_index)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
documents_ids.push(document_id);
|
||||
|
||||
let mut buffer = String::new();
|
||||
for field_id in searchable_fields_to_index.iter() {
|
||||
let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
|
||||
|
||||
for (deladd, field_bytes) in field_obkv {
|
||||
let mut extracted_positions =
|
||||
token_positions_extractor[deladd as usize].extract(field_bytes, &mut buffer)?;
|
||||
for (position, token) in extracted_positions.iter() {
|
||||
let word = token.lemma().trim();
|
||||
if !word_map.contains_key(word) {
|
||||
word_map.insert(word.to_string(), word_map.len() as u32);
|
||||
}
|
||||
let word_id = word_map.get(word).unwrap();
|
||||
word_docids_extractor.insert(*word_id, *field_id, document_id, deladd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if word_docids_extractor.rough_size_estimate()
|
||||
> indexer.max_memory.map_or(512 * 1024 * 1024, |s| s.min(512 * 1024 * 1024))
|
||||
{
|
||||
let WordDocidsDump { .. } =
|
||||
word_docids_extractor.dump(&word_map, &searchable_fields_to_index, indexer)?;
|
||||
}
|
||||
}
|
||||
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Check if any searchable fields of a document changed.
|
||||
fn searchable_fields_changed(
|
||||
obkv: &KvReader<FieldId>,
|
||||
searchable_fields: &BTreeSet<FieldId>,
|
||||
) -> bool {
|
||||
for field_id in searchable_fields {
|
||||
let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
|
||||
match (field_obkv.get(DelAdd::Deletion), field_obkv.get(DelAdd::Addition)) {
|
||||
// if both fields are None, check the next field.
|
||||
(None, None) => (),
|
||||
// if both contains a value and values are the same, check the next field.
|
||||
(Some(del), Some(add)) if del == add => (),
|
||||
// otherwise the fields are different, return true.
|
||||
_otherwise => return true,
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
@@ -1,203 +0,0 @@
|
||||
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::hash::Hash;
|
||||
use std::io::BufReader;
|
||||
use std::mem::size_of;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::update::del_add::KvWriterDelAdd;
|
||||
use crate::update::index_documents::extract::searchable::DelAdd;
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result};
|
||||
|
||||
pub struct WordDocidsExtractor<'a> {
|
||||
word_fid_docids: RevertedIndex<(u32, FieldId)>,
|
||||
settings_diff: &'a InnerIndexSettingsDiff,
|
||||
}
|
||||
|
||||
impl<'a> WordDocidsExtractor<'a> {
|
||||
pub fn new(settings_diff: &'a InnerIndexSettingsDiff) -> Self {
|
||||
Self { word_fid_docids: RevertedIndex::new(), settings_diff }
|
||||
}
|
||||
pub fn insert(&mut self, wordid: u32, fieldid: FieldId, docid: DocumentId, del_add: DelAdd) {
|
||||
self.word_fid_docids.insert((wordid, fieldid), docid, del_add);
|
||||
}
|
||||
|
||||
pub fn rough_size_estimate(&self) -> usize {
|
||||
self.word_fid_docids.rough_size_estimate()
|
||||
}
|
||||
|
||||
pub fn dump(
|
||||
&mut self,
|
||||
word_map: &BTreeMap<String, u32>,
|
||||
fields: &BTreeSet<FieldId>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<WordDocidsDump> {
|
||||
let mut word_fid_docids_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
let mut word_docids_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
let mut exact_word_docids_writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
tempfile::tempfile()?,
|
||||
);
|
||||
|
||||
let mut exact_word_deletion = RoaringBitmap::new();
|
||||
let mut exact_word_addition = RoaringBitmap::new();
|
||||
let mut word_deletion = RoaringBitmap::new();
|
||||
let mut word_addition = RoaringBitmap::new();
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut bitmap_buffer = Vec::new();
|
||||
let mut obkv_buffer = Vec::new();
|
||||
for (word, wid) in word_map {
|
||||
exact_word_deletion.clear();
|
||||
exact_word_addition.clear();
|
||||
word_deletion.clear();
|
||||
word_addition.clear();
|
||||
for fid in fields {
|
||||
if let Some((deletion, addition)) = self.word_fid_docids.inner.get(&(*wid, *fid)) {
|
||||
if self.settings_diff.old.exact_attributes.contains(&fid) {
|
||||
exact_word_deletion |= deletion;
|
||||
} else {
|
||||
word_deletion |= deletion;
|
||||
}
|
||||
|
||||
if self.settings_diff.new.exact_attributes.contains(&fid) {
|
||||
exact_word_addition |= addition;
|
||||
} else {
|
||||
word_addition |= addition;
|
||||
}
|
||||
|
||||
if deletion != addition {
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(word.as_bytes());
|
||||
key_buffer.push(0);
|
||||
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
let value = bitmaps_into_deladd_obkv(
|
||||
deletion,
|
||||
addition,
|
||||
&mut obkv_buffer,
|
||||
&mut bitmap_buffer,
|
||||
)?;
|
||||
word_fid_docids_writer.insert(&key_buffer, value)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(word.as_bytes());
|
||||
if exact_word_deletion != exact_word_addition {
|
||||
let value = bitmaps_into_deladd_obkv(
|
||||
&exact_word_deletion,
|
||||
&exact_word_addition,
|
||||
&mut obkv_buffer,
|
||||
&mut bitmap_buffer,
|
||||
)?;
|
||||
exact_word_docids_writer.insert(&key_buffer, value)?;
|
||||
}
|
||||
|
||||
if word_deletion != word_addition {
|
||||
let value = bitmaps_into_deladd_obkv(
|
||||
&word_deletion,
|
||||
&word_addition,
|
||||
&mut obkv_buffer,
|
||||
&mut bitmap_buffer,
|
||||
)?;
|
||||
word_docids_writer.insert(&key_buffer, value)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.word_fid_docids.clear();
|
||||
|
||||
Ok(WordDocidsDump {
|
||||
word_fid_docids: writer_into_reader(word_fid_docids_writer)?,
|
||||
word_docids: writer_into_reader(word_docids_writer)?,
|
||||
exact_word_docids: writer_into_reader(exact_word_docids_writer)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn bitmaps_into_deladd_obkv<'a>(
|
||||
deletion: &RoaringBitmap,
|
||||
addition: &RoaringBitmap,
|
||||
obkv_buffer: &'a mut Vec<u8>,
|
||||
bitmap_buffer: &mut Vec<u8>,
|
||||
) -> Result<&'a mut Vec<u8>> {
|
||||
obkv_buffer.clear();
|
||||
let mut value_writer = KvWriterDelAdd::new(obkv_buffer);
|
||||
if !deletion.is_empty() {
|
||||
bitmap_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(deletion, bitmap_buffer);
|
||||
value_writer.insert(DelAdd::Deletion, &*bitmap_buffer)?;
|
||||
}
|
||||
if !addition.is_empty() {
|
||||
bitmap_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into(addition, bitmap_buffer);
|
||||
value_writer.insert(DelAdd::Addition, &*bitmap_buffer)?;
|
||||
}
|
||||
Ok(value_writer.into_inner()?)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RevertedIndex<K> {
|
||||
inner: HashMap<K, (RoaringBitmap, RoaringBitmap)>,
|
||||
max_value_size: usize,
|
||||
}
|
||||
|
||||
impl<K: PartialEq + Eq + Hash> RevertedIndex<K> {
|
||||
pub fn insert(&mut self, key: K, docid: DocumentId, del_add: DelAdd) {
|
||||
let size = match self.inner.entry(key) {
|
||||
Occupied(mut entry) => {
|
||||
let (ref mut del, ref mut add) = entry.get_mut();
|
||||
match del_add {
|
||||
DelAdd::Deletion => del.insert(docid),
|
||||
DelAdd::Addition => add.insert(docid),
|
||||
};
|
||||
del.serialized_size() + add.serialized_size()
|
||||
}
|
||||
Vacant(entry) => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(docid);
|
||||
let size = bitmap.serialized_size();
|
||||
match del_add {
|
||||
DelAdd::Deletion => entry.insert((bitmap, RoaringBitmap::new())),
|
||||
DelAdd::Addition => entry.insert((RoaringBitmap::new(), bitmap)),
|
||||
};
|
||||
size * 2
|
||||
}
|
||||
};
|
||||
|
||||
self.max_value_size = self.max_value_size.max(size);
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self { inner: HashMap::new(), max_value_size: 0 }
|
||||
}
|
||||
|
||||
pub fn rough_size_estimate(&self) -> usize {
|
||||
self.inner.len() * size_of::<K>() + self.inner.len() * self.max_value_size
|
||||
}
|
||||
|
||||
fn clear(&mut self) {
|
||||
self.max_value_size = 0;
|
||||
self.inner.clear();
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WordDocidsDump {
|
||||
pub word_fid_docids: grenad::Reader<BufReader<File>>,
|
||||
pub word_docids: grenad::Reader<BufReader<File>>,
|
||||
pub exact_word_docids: grenad::Reader<BufReader<File>>,
|
||||
}
|
||||
@@ -1162,18 +1162,6 @@ impl InnerIndexSettingsDiff {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn searchable_fields_to_index(&self) -> BTreeSet<FieldId> {
|
||||
if self.settings_update_only {
|
||||
self.new
|
||||
.fields_ids_map
|
||||
.ids()
|
||||
.filter(|id| self.reindex_searchable_id(*id).is_some())
|
||||
.collect()
|
||||
} else {
|
||||
self.new.searchable_fields_ids.iter().copied().collect()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn any_reindexing_needed(&self) -> bool {
|
||||
self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user