WIP

Merge #4682
4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-11-28 17:00:32 +00:00 · 2024-07-04 11:18:45 +02:00 · 2024-06-11 02:51:17 +00:00 · 2024-06-10 14:03:55 +00:00 · 2024-06-10 15:07:34 +02:00 · 2024-06-10 09:36:08 +00:00
44 changed files with 2033 additions and 431 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -36,9 +36,9 @@ dependencies = [

 [[package]]
 name = "actix-http"
-version = "3.6.0"
+version = "3.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
+checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d"
 dependencies = [
 "actix-codec",
 "actix-rt",
@@ -46,7 +46,7 @@ dependencies = [
 "actix-tls",
 "actix-utils",
 "ahash",
- "base64 0.21.7",
+ "base64 0.22.1",
 "bitflags 2.5.0",
 "brotli",
 "bytes",
@@ -85,13 +85,15 @@ dependencies = [

 [[package]]
 name = "actix-router"
-version = "0.5.1"
+version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799"
+checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8"
 dependencies = [
 "bytestring",
+ "cfg-if",
 "http 0.2.11",
 "regex",
+ "regex-lite",
 "serde",
 "tracing",
 ]
@@ -138,9 +140,9 @@ dependencies = [

 [[package]]
 name = "actix-tls"
-version = "3.3.0"
+version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
+checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389"
 dependencies = [
 "actix-rt",
 "actix-service",
@@ -167,9 +169,9 @@ dependencies = [

 [[package]]
 name = "actix-web"
-version = "4.5.1"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
+checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32"
 dependencies = [
 "actix-codec",
 "actix-http",
@@ -196,7 +198,7 @@ dependencies = [
 "mime",
 "once_cell",
 "pin-project-lite",
- "regex",
+ "regex-lite",
 "serde",
 "serde_json",
 "serde_urlencoded",
@@ -220,8 +222,9 @@ dependencies = [

 [[package]]
 name = "actix-web-static-files"
-version = "3.0.5"
-source = "git+https://github.com/kilork/actix-web-static-files.git?rev=2d3b6160#2d3b6160f0de4ba061c5d76b5704f34fb677f6df"
+version = "4.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adf6d1ef6d7a60e084f9e0595e2a5234abda14e76c105ecf8e2d0e8800c41a1f"
 dependencies = [
 "actix-web",
 "derive_more",
@@ -613,9 +616,9 @@ dependencies = [

 [[package]]
 name = "brotli"
-version = "3.4.0"
+version = "6.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
+checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b"
 dependencies = [
 "alloc-no-stdlib",
 "alloc-stdlib",
@@ -624,9 +627,9 @@ dependencies = [

 [[package]]
 name = "brotli-decompressor"
-version = "2.5.1"
+version = "4.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
+checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362"
 dependencies = [
 "alloc-no-stdlib",
 "alloc-stdlib",
@@ -895,9 +898,9 @@ dependencies = [

 [[package]]
 name = "charabia"
-version = "0.8.10"
+version = "0.8.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
+checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6"
 dependencies = [
 "aho-corasick",
 "cow-utils",
@@ -986,7 +989,7 @@ dependencies = [
 "anstream",
 "anstyle",
 "clap_lex",
- "strsim",
+ "strsim 0.10.0",
 ]

 [[package]]
@@ -1277,12 +1280,12 @@ dependencies = [

 [[package]]
 name = "darling"
-version = "0.20.3"
+version = "0.20.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
+checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1"
 dependencies = [
- "darling_core 0.20.3",
- "darling_macro 0.20.3",
+ "darling_core 0.20.9",
+ "darling_macro 0.20.9",
 ]

 [[package]]
@@ -1295,21 +1298,21 @@ dependencies = [
 "ident_case",
 "proc-macro2",
 "quote",
- "strsim",
+ "strsim 0.10.0",
 "syn 1.0.109",
 ]

 [[package]]
 name = "darling_core"
-version = "0.20.3"
+version = "0.20.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
+checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120"
 dependencies = [
 "fnv",
 "ident_case",
 "proc-macro2",
 "quote",
- "strsim",
+ "strsim 0.11.1",
 "syn 2.0.60",
 ]

@@ -1326,11 +1329,11 @@ dependencies = [

 [[package]]
 name = "darling_macro"
-version = "0.20.3"
+version = "0.20.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
+checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178"
 dependencies = [
- "darling_core 0.20.3",
+ "darling_core 0.20.9",
 "quote",
 "syn 2.0.60",
 ]
@@ -1383,6 +1386,15 @@ dependencies = [
 "derive_builder_macro 0.13.1",
 ]

+[[package]]
+name = "derive_builder"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
+dependencies = [
+ "derive_builder_macro 0.20.0",
+]
+
 [[package]]
 name = "derive_builder_core"
 version = "0.12.0"
@@ -1407,6 +1419,18 @@ dependencies = [
 "syn 1.0.109",
 ]

+[[package]]
+name = "derive_builder_core"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
+dependencies = [
+ "darling 0.20.9",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.60",
+]
+
 [[package]]
 name = "derive_builder_macro"
 version = "0.12.0"
@@ -1427,6 +1451,16 @@ dependencies = [
 "syn 1.0.109",
 ]

+[[package]]
+name = "derive_builder_macro"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
+dependencies = [
+ "derive_builder_core 0.20.0",
+ "syn 2.0.60",
+]
+
 [[package]]
 name = "derive_more"
 version = "0.99.17"
@@ -1454,7 +1488,7 @@ dependencies = [
 "serde-cs",
 "serde_json",
 "serde_urlencoded",
- "strsim",
+ "strsim 0.10.0",
 ]

 [[package]]
@@ -1707,29 +1741,6 @@ dependencies = [
 "syn 2.0.60",
 ]

-[[package]]
-name = "env_filter"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
-dependencies = [
- "log",
- "regex",
-]
-
-[[package]]
-name = "env_logger"
-version = "0.11.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
-dependencies = [
- "anstream",
- "anstyle",
- "env_filter",
- "humantime",
- "log",
-]
-
 [[package]]
 name = "equivalent"
 version = "1.0.1"
@@ -1784,7 +1795,7 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5"
 dependencies = [
- "darling 0.20.3",
+ "darling 0.20.9",
 "proc-macro2",
 "quote",
 "syn 2.0.60",
@@ -2379,12 +2390,6 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"

-[[package]]
-name = "humantime"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
-
 [[package]]
 name = "hyper"
 version = "0.14.27"
@@ -2778,9 +2783,9 @@ dependencies = [

 [[package]]
 name = "lindera"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88"
+checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f"
 dependencies = [
 "lindera-analyzer",
 "lindera-core",
@@ -2791,9 +2796,9 @@ dependencies = [

 [[package]]
 name = "lindera-analyzer"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7"
+checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2821,9 +2826,9 @@ dependencies = [

 [[package]]
 name = "lindera-cc-cedict"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d"
+checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1"
 dependencies = [
 "bincode",
 "byteorder",
@@ -2835,29 +2840,21 @@ dependencies = [

 [[package]]
 name = "lindera-cc-cedict-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683"
+checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-compress"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500"
+checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c"
 dependencies = [
 "anyhow",
 "flate2",
@@ -2866,9 +2863,9 @@ dependencies = [

 [[package]]
 name = "lindera-core"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8"
+checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2883,9 +2880,9 @@ dependencies = [

 [[package]]
 name = "lindera-decompress"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e"
+checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12"
 dependencies = [
 "anyhow",
 "flate2",
@@ -2894,9 +2891,9 @@ dependencies = [

 [[package]]
 name = "lindera-dictionary"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe"
+checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2918,10 +2915,32 @@ dependencies = [
 ]

 [[package]]
-name = "lindera-filter"
-version = "0.30.0"
+name = "lindera-dictionary-builder"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d"
+checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8"
+dependencies = [
+ "anyhow",
+ "bincode",
+ "byteorder",
+ "csv",
+ "derive_builder 0.20.0",
+ "encoding",
+ "encoding_rs",
+ "encoding_rs_io",
+ "glob",
+ "lindera-compress",
+ "lindera-core",
+ "lindera-decompress",
+ "log",
+ "yada",
+]
+
+[[package]]
+name = "lindera-filter"
+version = "0.31.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a"
 dependencies = [
 "anyhow",
 "csv",
@@ -2944,9 +2963,9 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25"
+checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664"
 dependencies = [
 "bincode",
 "byteorder",
@@ -2958,31 +2977,21 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90"
+checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding_rs",
- "encoding_rs_io",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "serde",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-ipadic-neologd"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f"
+checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1"
 dependencies = [
 "bincode",
 "byteorder",
@@ -2994,31 +3003,21 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic-neologd-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20"
+checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding_rs",
- "encoding_rs_io",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "serde",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-ko-dic"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48"
+checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db"
 dependencies = [
 "bincode",
 "byteorder",
@@ -3034,29 +3033,21 @@ dependencies = [

 [[package]]
 name = "lindera-ko-dic-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577"
+checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-tokenizer"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d"
+checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14"
 dependencies = [
 "bincode",
 "lindera-core",
@@ -3068,9 +3059,9 @@ dependencies = [

 [[package]]
 name = "lindera-unidic"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23"
+checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597"
 dependencies = [
 "bincode",
 "byteorder",
@@ -3086,22 +3077,14 @@ dependencies = [

 [[package]]
 name = "lindera-unidic-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008"
+checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
@@ -4340,6 +4323,12 @@ dependencies = [
 "regex-syntax",
 ]

+[[package]]
+name = "regex-lite"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.2"
@@ -4388,12 +4377,6 @@ dependencies = [
 "winreg",
 ]

-[[package]]
-name = "retain_mut"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
-
 [[package]]
 name = "ring"
 version = "0.17.8"
@@ -4411,13 +4394,12 @@ dependencies = [

 [[package]]
 name = "roaring"
-version = "0.10.2"
+version = "0.10.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
+checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8"
 dependencies = [
 "bytemuck",
 "byteorder",
- "retain_mut",
 "serde",
 ]

@@ -4900,6 +4882,12 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"

+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
 [[package]]
 name = "strum"
 version = "0.26.2"
@@ -5313,9 +5301,9 @@ dependencies = [

 [[package]]
 name = "tracing-actix-web"
-version = "0.7.9"
+version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
+checksum = "fa069bd1503dd526ee793bb3fce408895136c95fc86d2edb2acf1c646d7f0684"
 dependencies = [
 "actix-web",
 "mutually_exclusive_features",
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -11,7 +11,7 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-actix-web = { version = "4.5.1", default-features = false }
+actix-web = { version = "4.6.0", default-features = false }
 anyhow = "1.0.79"
 convert_case = "0.6.0"
 csv = "1.3.0"
@@ -30,7 +30,12 @@ serde_json = "1.0.111"
 tar = "0.4.40"
 tempfile = "3.9.0"
 thiserror = "1.0.56"
-time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+time = { version = "0.3.31", features = [
+    "serde-well-known",
+    "formatting",
+    "parsing",
+    "macros",
+] }
 tokio = "1.35"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }

--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@@ -189,4 +189,6 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
 merge_with_error_impl_take_error_message!(ParseTaskStatusError);
 merge_with_error_impl_take_error_message!(IndexUidFormatError);
 merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
+merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
+merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
 merge_with_error_impl_take_error_message!(InvalidSimilarId);
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -241,6 +241,8 @@ InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ;
 InvalidSimilarAttributesToRetrieve    , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToRetrieve     , InvalidRequest       , BAD_REQUEST ;
+InvalidSearchRankingScoreThreshold    , InvalidRequest       , BAD_REQUEST ;
+InvalidSimilarRankingScoreThreshold   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ;
@@ -505,6 +507,21 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
    }
 }

+impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
+        )
+    }
+}
+
+impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f)
+    }
+}
+
 #[macro_export]
 macro_rules! internal_error {
    ($target:ty : $($other:path), *) => {
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -14,20 +14,20 @@ default-run = "meilisearch"

 [dependencies]
 actix-cors = "0.7.0"
-actix-http = { version = "3.6.0", default-features = false, features = [
+actix-http = { version = "3.7.0", default-features = false, features = [
    "compress-brotli",
    "compress-gzip",
    "rustls-0_21",
 ] }
 actix-utils = "3.0.1"
-actix-web = { version = "4.5.1", default-features = false, features = [
+actix-web = { version = "4.6.0", default-features = false, features = [
    "macros",
    "compress-brotli",
    "compress-gzip",
    "cookies",
    "rustls-0_21",
 ] }
-actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
+actix-web-static-files = { version = "4.0.1", optional = true }
 anyhow = { version = "1.0.79", features = ["backtrace"] }
 async-stream = "0.3.5"
 async-trait = "0.1.77"
@@ -105,13 +105,13 @@ url = { version = "2.5.0", features = ["serde"] }
 tracing = "0.1.40"
 tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
-tracing-actix-web = "0.7.9"
+tracing-actix-web = "0.7.10"
 build-info = { version = "1.7.0", path = "../build-info" }

 [dev-dependencies]
 actix-rt = "2.9.0"
 assert-json-diff = "2.0.2"
-brotli = "3.4.0"
+brotli = "6.0.0"
 insta = "1.34.0"
 manifest-dir-macros = "0.1.18"
 maplit = "1.0.2"
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -648,6 +648,7 @@ pub struct SearchAggregator {
    // scoring
    show_ranking_score: bool,
    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
 }

 impl SearchAggregator {
@@ -676,6 +677,7 @@ impl SearchAggregator {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
@@ -748,6 +750,7 @@ impl SearchAggregator {

        ret.show_ranking_score = *show_ranking_score;
        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();

        if let Some(hybrid) = hybrid {
            ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
@@ -821,6 +824,7 @@ impl SearchAggregator {
            hybrid,
            total_degraded,
            total_used_negative_operator,
+            ranking_score_threshold,
        } = other;

        if self.timestamp.is_none() {
@@ -904,6 +908,7 @@ impl SearchAggregator {
        // scoring
        self.show_ranking_score |= show_ranking_score;
        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
    }

    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -945,6 +950,7 @@ impl SearchAggregator {
            hybrid,
            total_degraded,
            total_used_negative_operator,
+            ranking_score_threshold,
        } = self;

        if total_received == 0 {
@@ -1015,6 +1021,7 @@ impl SearchAggregator {
                "scoring": {
                    "show_ranking_score": show_ranking_score,
                    "show_ranking_score_details": show_ranking_score_details,
+                    "ranking_score_threshold": ranking_score_threshold,
                },
            });

@@ -1087,6 +1094,7 @@ impl MultiSearchAggregator {
                    matching_strategy: _,
                    attributes_to_search_on: _,
                    hybrid: _,
+                    ranking_score_threshold: _,
                } = query;

                index_uid.as_str()
@@ -1234,6 +1242,7 @@ impl FacetSearchAggregator {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
@@ -1248,7 +1257,8 @@ impl FacetSearchAggregator {
            || filter.is_some()
            || *matching_strategy != MatchingStrategy::default()
            || attributes_to_search_on.is_some()
-            || hybrid.is_some();
+            || hybrid.is_some()
+            || ranking_score_threshold.is_some();

        ret
    }
@@ -1624,6 +1634,7 @@ pub struct SimilarAggregator {
    // scoring
    show_ranking_score: bool,
    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
 }

 impl SimilarAggregator {
@@ -1638,6 +1649,7 @@ impl SimilarAggregator {
            show_ranking_score,
            show_ranking_score_details,
            filter,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
@@ -1675,6 +1687,7 @@ impl SimilarAggregator {

        ret.show_ranking_score = *show_ranking_score;
        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();

        ret.embedder = embedder.is_some();

@@ -1708,6 +1721,7 @@ impl SimilarAggregator {
            show_ranking_score,
            show_ranking_score_details,
            embedder,
+            ranking_score_threshold,
        } = other;

        if self.timestamp.is_none() {
@@ -1749,6 +1763,7 @@ impl SimilarAggregator {
        // scoring
        self.show_ranking_score |= show_ranking_score;
        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
    }

    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -1769,6 +1784,7 @@ impl SimilarAggregator {
            show_ranking_score,
            show_ranking_score_details,
            embedder,
+            ranking_score_threshold,
        } = self;

        if total_received == 0 {
@@ -1808,6 +1824,7 @@ impl SimilarAggregator {
                "scoring": {
                    "show_ranking_score": show_ranking_score,
                    "show_ranking_score_details": show_ranking_score_details,
+                    "ranking_score_threshold": ranking_score_threshold,
                },
            });

--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::routes::indexes::search::search_kind;
 use crate::search::{
-    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
-    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
+    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
+    SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };
 use crate::search_queue::SearchQueue;
@@ -46,6 +46,8 @@ pub struct FacetSearchQuery {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 }

 pub async fn search(
@@ -103,6 +105,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = value;

        SearchQuery {
@@ -128,6 +131,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            vector,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        }
    }
 }
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -19,9 +19,10 @@ use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
 use crate::search::{
-    add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
-    SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
+    add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
+    SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
 };
 use crate::search_queue::SearchQueue;

@@ -82,6 +83,21 @@ pub struct SearchQueryGet {
    pub hybrid_embedder: Option<String>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
    pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
+    pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
+#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
+pub struct RankingScoreThresholdGet(RankingScoreThreshold);
+
+impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
+    type Error = InvalidSearchRankingScoreThreshold;
+
+    fn try_from(s: String) -> Result<Self, Self::Error> {
+        let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
+        Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
+    }
 }

 #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
@@ -152,6 +168,7 @@ impl From<SearchQueryGet> for SearchQuery {
            matching_strategy: other.matching_strategy,
            attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
            hybrid,
+            ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
        }
    }
 }
--- a/meilisearch/src/routes/indexes/similar.rs
+++ b/meilisearch/src/routes/indexes/similar.rs
@@ -6,8 +6,8 @@ use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::error::deserr_codes::{
    InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
-    InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
-    InvalidSimilarShowRankingScoreDetails,
+    InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarRankingScoreThreshold,
+    InvalidSimilarShowRankingScore, InvalidSimilarShowRankingScoreDetails,
 };
 use meilisearch_types::error::{ErrorCode as _, ResponseError};
 use meilisearch_types::index_uid::IndexUid;
@@ -21,8 +21,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::search::{
-    add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
-    DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+    add_search_rules, perform_similar, RankingScoreThresholdSimilar, SearchKind, SimilarQuery,
+    SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };

 pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -42,9 +42,7 @@ pub async fn similar_get(
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

-    let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
-        ResponseError::from_msg(code.to_string(), code.error_code())
-    })?;
+    let query = params.0.try_into()?;

    let mut aggregate = SimilarAggregator::from_query(&query, &req);

@@ -130,12 +128,27 @@ pub struct SimilarQueryGet {
    show_ranking_score: Param<bool>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
    show_ranking_score_details: Param<bool>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
    #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
    pub embedder: Option<String>,
 }

+#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
+#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
+pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
+
+impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
+    type Error = InvalidSimilarRankingScoreThreshold;
+
+    fn try_from(s: String) -> Result<Self, Self::Error> {
+        let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
+        Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
+    }
+}
+
 impl TryFrom<SimilarQueryGet> for SimilarQuery {
-    type Error = InvalidSimilarId;
+    type Error = ResponseError;

    fn try_from(
        SimilarQueryGet {
@@ -147,6 +160,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
            show_ranking_score,
            show_ranking_score_details,
            embedder,
+            ranking_score_threshold,
        }: SimilarQueryGet,
    ) -> Result<Self, Self::Error> {
        let filter = match filter {
@@ -158,7 +172,9 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
        };

        Ok(SimilarQuery {
-            id: id.0.try_into()?,
+            id: id.0.try_into().map_err(|code: InvalidSimilarId| {
+                ResponseError::from_msg(code.to_string(), code.error_code())
+            })?,
            offset: offset.0,
            limit: limit.0,
            filter,
@@ -166,6 +182,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
            attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
            show_ranking_score: show_ranking_score.0,
            show_ranking_score_details: show_ranking_score_details.0,
+            ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
        })
    }
 }
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -87,6 +87,44 @@ pub struct SearchQuery {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
+#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
+pub struct RankingScoreThreshold(f64);
+
+impl std::convert::TryFrom<f64> for RankingScoreThreshold {
+    type Error = InvalidSearchRankingScoreThreshold;
+
+    fn try_from(f: f64) -> Result<Self, Self::Error> {
+        // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
+        #[allow(clippy::manual_range_contains)]
+        if f > 1.0 || f < 0.0 {
+            Err(InvalidSearchRankingScoreThreshold)
+        } else {
+            Ok(RankingScoreThreshold(f))
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
+#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
+pub struct RankingScoreThresholdSimilar(f64);
+
+impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
+    type Error = InvalidSimilarRankingScoreThreshold;
+
+    fn try_from(f: f64) -> Result<Self, Self::Error> {
+        // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
+        #[allow(clippy::manual_range_contains)]
+        if f > 1.0 || f < 0.0 {
+            Err(InvalidSimilarRankingScoreThreshold)
+        } else {
+            Ok(Self(f))
+        }
+    }
 }

 // Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
@@ -117,6 +155,7 @@ impl fmt::Debug for SearchQuery {
            crop_marker,
            matching_strategy,
            attributes_to_search_on,
+            ranking_score_threshold,
        } = self;

        let mut debug = f.debug_struct("SearchQuery");
@@ -188,6 +227,9 @@ impl fmt::Debug for SearchQuery {
        debug.field("highlight_pre_tag", &highlight_pre_tag);
        debug.field("highlight_post_tag", &highlight_post_tag);
        debug.field("crop_marker", &crop_marker);
+        if let Some(ranking_score_threshold) = ranking_score_threshold {
+            debug.field("ranking_score_threshold", &ranking_score_threshold);
+        }

        debug.finish()
    }
@@ -356,6 +398,8 @@ pub struct SearchQueryWithIndex {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 }

 impl SearchQueryWithIndex {
@@ -384,6 +428,7 @@ impl SearchQueryWithIndex {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = self;
        (
            index_uid,
@@ -410,6 +455,7 @@ impl SearchQueryWithIndex {
                matching_strategy,
                attributes_to_search_on,
                hybrid,
+                ranking_score_threshold,
                // do not use ..Default::default() here,
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
            },
@@ -436,6 +482,8 @@ pub struct SimilarQuery {
    pub show_ranking_score: bool,
    #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
    pub show_ranking_score_details: bool,
+    #[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
 }

 #[derive(Debug, Clone, PartialEq, Deserr)]
@@ -664,6 +712,9 @@ fn prepare_search<'t>(
 ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
    let mut search = index.search(rtxn);
    search.time_budget(time_budget);
+    if let Some(ranking_score_threshold) = query.ranking_score_threshold {
+        search.ranking_score_threshold(ranking_score_threshold.0);
+    }

    match search_kind {
        SearchKind::KeywordOnly => {
@@ -705,11 +756,16 @@ fn prepare_search<'t>(
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

    search.exhaustive_number_hits(is_finite_pagination);
-    search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
-        ScoringStrategy::Detailed
-    } else {
-        ScoringStrategy::Skip
-    });
+    search.scoring_strategy(
+        if query.show_ranking_score
+            || query.show_ranking_score_details
+            || query.ranking_score_threshold.is_some()
+        {
+            ScoringStrategy::Detailed
+        } else {
+            ScoringStrategy::Skip
+        },
+    );

    // compute the offset on the limit depending on the pagination mode.
    let (offset, limit) = if is_finite_pagination {
@@ -787,10 +843,6 @@ pub fn perform_search(

    let SearchQuery {
        q,
-        vector: _,
-        hybrid: _,
-        // already computed from prepare_search
-        offset: _,
        limit,
        page,
        hits_per_page,
@@ -801,14 +853,19 @@ pub fn perform_search(
        show_matches_position,
        show_ranking_score,
        show_ranking_score_details,
-        filter: _,
        sort,
        facets,
        highlight_pre_tag,
        highlight_post_tag,
        crop_marker,
+        // already used in prepare_search
+        vector: _,
+        hybrid: _,
+        offset: _,
+        ranking_score_threshold: _,
        matching_strategy: _,
        attributes_to_search_on: _,
+        filter: _,
    } = query;

    let format = AttributesFormat {
@@ -1070,6 +1127,7 @@ pub fn perform_similar(
        attributes_to_retrieve,
        show_ranking_score,
        show_ranking_score_details,
+        ranking_score_threshold,
    } = query;

    // using let-else rather than `?` so that the borrow checker identifies we're always returning here,
@@ -1093,6 +1151,10 @@ pub fn perform_similar(
        }
    }

+    if let Some(ranking_score_threshold) = ranking_score_threshold {
+        similar.ranking_score_threshold(ranking_score_threshold.0);
+    }
+
    let milli::SearchResult {
        documents_ids,
        matching_words: _,
--- a/meilisearch/src/search_queue.rs
+++ b/meilisearch/src/search_queue.rs
@@ -40,8 +40,9 @@ pub struct Permit {

 impl Drop for Permit {
    fn drop(&mut self) {
+        let sender = self.sender.clone();
        // if the channel is closed then the whole instance is down
-        let _ = futures::executor::block_on(self.sender.send(()));
+        std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
    }
 }

--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@@ -321,6 +321,40 @@ async fn search_bad_facets() {
    // Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings.
 }

+#[actix_rt::test]
+async fn search_bad_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`",
+      "code": "invalid_search_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn search_invalid_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_search_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn search_non_filterable_facets() {
    let server = Server::new().await;
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -48,6 +48,31 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    ])
 });

+static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+        {
+            "title": "Batman the dark knight returns: Part 1",
+            "id": "A",
+        },
+        {
+            "title": "Batman the dark knight returns: Part 2",
+            "id": "B",
+        },
+        {
+            "title": "Batman Returns",
+            "id": "C",
+        },
+        {
+            "title": "Batman",
+            "id": "D",
+        },
+        {
+            "title": "Badman",
+            "id": "E",
+        }
+    ])
+});
+
 static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
        {
@@ -960,6 +985,213 @@ async fn test_score_details() {
        .await;
 }

+#[actix_rt::test]
+async fn test_score() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = SCORE_DOCUMENTS.clone();
+
+    let res = index.add_documents(json!(documents), None).await;
+    index.wait_task(res.0.uid()).await;
+
+    index
+        .search(
+            json!({
+                "q": "Badman the dark knight returns 1",
+                "showRankingScore": true,
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.9746605609456898
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.8055252965383685
+                  },
+                  {
+                    "title": "Badman",
+                    "id": "E",
+                    "_rankingScore": 0.16666666666666666
+                  },
+                  {
+                    "title": "Batman Returns",
+                    "id": "C",
+                    "_rankingScore": 0.07702020202020202
+                  },
+                  {
+                    "title": "Batman",
+                    "id": "D",
+                    "_rankingScore": 0.07702020202020202
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+}
+
+#[actix_rt::test]
+async fn test_score_threshold() {
+    let query = "Badman dark returns 1";
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = SCORE_DOCUMENTS.clone();
+
+    let res = index.add_documents(json!(documents), None).await;
+    index.wait_task(res.0.uid()).await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.0
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5");
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.6685627880184332
+                  },
+                  {
+                    "title": "Badman",
+                    "id": "E",
+                    "_rankingScore": 0.25
+                  },
+                  {
+                    "title": "Batman Returns",
+                    "id": "C",
+                    "_rankingScore": 0.11553030303030302
+                  },
+                  {
+                    "title": "Batman",
+                    "id": "D",
+                    "_rankingScore": 0.11553030303030302
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.2
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###);
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.6685627880184332
+                  },
+                  {
+                    "title": "Badman",
+                    "id": "E",
+                    "_rankingScore": 0.25
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.5
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###);
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.6685627880184332
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.8
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###);
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 1.0
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###);
+                // nobody is perfect
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
+            },
+        )
+        .await;
+}
+
 #[actix_rt::test]
 async fn test_degraded_score_details() {
    let server = Server::new().await;
--- a/meilisearch/tests/similar/errors.rs
+++ b/meilisearch/tests/similar/errors.rs
@@ -87,6 +87,68 @@ async fn similar_bad_id() {
    "###);
 }

+#[actix_rt::test]
+async fn similar_bad_ranking_score_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+    server.set_features(json!({"vectorStore": true})).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+        "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            }
+        },
+        "filterableAttributes": ["title"]}))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await;
+
+    let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`",
+      "code": "invalid_similar_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn similar_invalid_ranking_score_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+    server.set_features(json!({"vectorStore": true})).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+        "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            }
+        },
+        "filterableAttributes": ["title"]}))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await;
+
+    let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_similar_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn similar_invalid_id() {
    let server = Server::new().await;
--- a/meilisearch/tests/similar/mod.rs
+++ b/meilisearch/tests/similar/mod.rs
@@ -194,6 +194,235 @@ async fn basic() {
        .await;
 }

+#[actix_rt::test]
+async fn ranking_score_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+        "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            }
+        },
+        "filterableAttributes": ["title"]}))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await;
+
+    let documents = DOCUMENTS.clone();
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(value.uid()).await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  },
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6,
+                        0.8,
+                        -0.2
+                      ]
+                    },
+                    "_rankingScore": 0.39060014486312866
+                  },
+                  {
+                    "title": "How to Train Your Dragon: The Hidden World",
+                    "release_year": 2019,
+                    "id": "166428",
+                    "_vectors": {
+                      "manual": [
+                        0.7,
+                        0.7,
+                        -0.4
+                      ]
+                    },
+                    "_rankingScore": 0.2819308042526245
+                  },
+                  {
+                    "title": "Shazam!",
+                    "release_year": 2019,
+                    "id": "287947",
+                    "_vectors": {
+                      "manual": [
+                        0.8,
+                        0.4,
+                        -0.5
+                      ]
+                    },
+                    "_rankingScore": 0.1662663221359253
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  },
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6,
+                        0.8,
+                        -0.2
+                      ]
+                    },
+                    "_rankingScore": 0.39060014486312866
+                  },
+                  {
+                    "title": "How to Train Your Dragon: The Hidden World",
+                    "release_year": 2019,
+                    "id": "166428",
+                    "_vectors": {
+                      "manual": [
+                        0.7,
+                        0.7,
+                        -0.4
+                      ]
+                    },
+                    "_rankingScore": 0.2819308042526245
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  },
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6,
+                        0.8,
+                        -0.2
+                      ]
+                    },
+                    "_rankingScore": 0.39060014486312866
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @"[]");
+            },
+        )
+        .await;
+}
+
 #[actix_rt::test]
 async fn filter() {
    let server = Server::new().await;
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.9.0"
 bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.8.10", default-features = false }
+charabia = { version = "0.8.11", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.11"
 deserr = "0.6.1"
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@@ -66,6 +66,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                &mut DefaultSearchLogger,
                logger,
                TimeBudget::max(),
+                None,
            )?;
            if let Some((logger, dir)) = detailed_logger {
                logger.finish(&mut ctx, Path::new(dir))?;
--- a/milli/src/heed_codec/facet/mod.rs
+++ b/milli/src/heed_codec/facet/mod.rs
@@ -47,6 +47,12 @@ pub struct FacetGroupValue {
    pub bitmap: RoaringBitmap,
 }

+#[derive(Debug)]
+pub struct FacetGroupLazyValue<'b> {
+    pub size: u8,
+    pub bitmap_bytes: &'b [u8],
+}
+
 pub struct FacetGroupKeyCodec<T> {
    _phantom: PhantomData<T>,
 }
@@ -69,6 +75,7 @@ where
        Ok(Cow::Owned(v))
    }
 }
+
 impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
 where
    T: BytesDecode<'a>,
@@ -84,6 +91,7 @@ where
 }

 pub struct FacetGroupValueCodec;
+
 impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
    type EItem = FacetGroupValue;

@@ -93,11 +101,23 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
        Ok(Cow::Owned(v))
    }
 }
+
 impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
    type DItem = FacetGroupValue;
+
    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
        let size = bytes[0];
        let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
        Ok(FacetGroupValue { size, bitmap })
    }
 }
+
+pub struct FacetGroupLazyValueCodec;
+
+impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
+    type DItem = FacetGroupLazyValue<'a>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
+    }
+}
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -1,5 +1,5 @@
 use std::borrow::Cow;
-use std::io;
+use std::io::{self, Cursor};
 use std::mem::size_of;

 use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
@@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec {
        }
    }

+    pub fn intersection_with_serialized(
+        mut bytes: &[u8],
+        other: &RoaringBitmap,
+    ) -> io::Result<RoaringBitmap> {
+        // See above `deserialize_from` method for implementation details.
+        if bytes.len() <= THRESHOLD * size_of::<u32>() {
+            let mut bitmap = RoaringBitmap::new();
+            while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
+                if other.contains(integer) {
+                    bitmap.insert(integer);
+                }
+            }
+            Ok(bitmap)
+        } else {
+            other.intersection_with_serialized_unchecked(Cursor::new(bytes))
+        }
+    }
+
    /// Merge serialized CboRoaringBitmaps in a buffer.
    ///
    /// if the merged values length is under the threshold, values are directly
--- a/milli/src/search/facet/facet_distribution_iter.rs
+++ b/milli/src/search/facet/facet_distribution_iter.rs
@@ -38,7 +38,7 @@ where
        field_id,
    )?;

-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
        Ok(())
    } else {
@@ -81,7 +81,7 @@ where
        field_id,
    )?;

-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        // We first fill the heap with values from the highest level
        let starting_key =
            FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
--- a/milli/src/search/facet/facet_range_search.rs
+++ b/milli/src/search/facet/facet_range_search.rs
@@ -4,9 +4,11 @@ use heed::BytesEncode;
 use roaring::RoaringBitmap;

 use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
 use crate::heed_codec::BytesRefCodec;
-use crate::Result;
+use crate::{CboRoaringBitmapCodec, Result};

 /// Find all the document ids for which the given field contains a value contained within
 /// the two bounds.
@@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
    field_id: u16,
    left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
    right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
+    universe: Option<&RoaringBitmap>,
    docids: &mut RoaringBitmap,
 ) -> Result<()>
 where
@@ -46,13 +49,15 @@ where
        }
        Bound::Unbounded => Bound::Unbounded,
    };
-    let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
-    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
+    let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
+    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
    let highest_level = get_highest_level(rtxn, db, field_id)?;

-    if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(starting_left_bound) =
+        get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
+    {
        let rightmost_bound =
-            Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
+            Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
        let group_size = usize::MAX;
        f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
        Ok(())
@@ -64,12 +69,16 @@ where
 /// Fetch the document ids that have a facet with a value between the two given bounds
 struct FacetRangeSearch<'t, 'b, 'bitmap> {
    rtxn: &'t heed::RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
    field_id: u16,
    left: Bound<&'b [u8]>,
    right: Bound<&'b [u8]>,
+    /// The subset of documents ids that are useful for this search.
+    /// Great performance optimizations can be achieved by only fetching values matching this subset.
+    universe: Option<&'bitmap RoaringBitmap>,
    docids: &'bitmap mut RoaringBitmap,
 }
+
 impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
    fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
        let left_key =
@@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
            }

            if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
-                *self.docids |= value.bitmap;
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
+                };
            }
        }
        Ok(())
@@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
                left_condition && right_condition
            };
            if should_take_whole_group {
-                *self.docids |= &previous_value.bitmap;
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        previous_value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+                };
                previous_key = next_key;
                previous_value = next_value;
                continue;
@@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
            left_condition && right_condition
        };
        if should_take_whole_group {
-            *self.docids |= &previous_value.bitmap;
+            *self.docids |= match self.universe {
+                Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                    previous_value.bitmap_bytes,
+                    universe,
+                )?,
+                None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+            };
        } else {
            let level = level - 1;
            let starting_left_bound = previous_key.left_bound;
@@ -365,6 +392,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -384,6 +412,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -418,6 +447,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -439,6 +469,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -474,6 +505,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -499,6 +531,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -537,6 +570,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -556,6 +590,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -571,6 +606,7 @@ mod tests {
                0,
                &Bound::Unbounded,
                &Bound::Unbounded,
+                None,
                &mut docids,
            )
            .unwrap();
@@ -586,6 +622,7 @@ mod tests {
                1,
                &Bound::Unbounded,
                &Bound::Unbounded,
+                None,
                &mut docids,
            )
            .unwrap();
@@ -621,6 +658,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -634,6 +672,7 @@ mod tests {
                    1,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
--- a/milli/src/search/facet/facet_sort_ascending.rs
+++ b/milli/src/search/facet/facet_sort_ascending.rs
@@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>(
    candidates: RoaringBitmap,
 ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
    let highest_level = get_highest_level(rtxn, db, field_id)?;
-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
        let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);

--- a/milli/src/search/facet/facet_sort_descending.rs
+++ b/milli/src/search/facet/facet_sort_descending.rs
@@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>(
    candidates: RoaringBitmap,
 ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
    let highest_level = get_highest_level(rtxn, db, field_id)?;
-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
-        let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
+        let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
        let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
        let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
        Ok(itertools::Either::Left(DescendingFacetSort {
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
 pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
-use roaring::RoaringBitmap;
+use roaring::{MultiOps, RoaringBitmap};
 use serde_json::Value;

 use super::facet_range_search;
@@ -224,14 +224,14 @@ impl<'a> Filter<'a> {
    pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
        let filterable_fields = index.filterable_fields(rtxn)?;
-
-        self.inner_evaluate(rtxn, index, &filterable_fields)
+        self.inner_evaluate(rtxn, index, &filterable_fields, None)
    }

    fn evaluate_operator(
        rtxn: &heed::RoTxn,
        index: &Index,
        field_id: FieldId,
+        universe: Option<&RoaringBitmap>,
        operator: &Condition<'a>,
    ) -> Result<RoaringBitmap> {
        let numbers_db = index.facet_id_f64_docids;
@@ -291,14 +291,22 @@ impl<'a> Filter<'a> {
            }
            Condition::NotEqual(val) => {
                let operator = Condition::Equal(val.clone());
-                let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
+                let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
                let all_ids = index.documents_ids(rtxn)?;
                return Ok(all_ids - docids);
            }
        };

        let mut output = RoaringBitmap::new();
-        Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
+        Self::explore_facet_number_levels(
+            rtxn,
+            numbers_db,
+            field_id,
+            left,
+            right,
+            universe,
+            &mut output,
+        )?;
        Ok(output)
    }

@@ -310,6 +318,7 @@ impl<'a> Filter<'a> {
        field_id: FieldId,
        left: Bound<f64>,
        right: Bound<f64>,
+        universe: Option<&RoaringBitmap>,
        output: &mut RoaringBitmap,
    ) -> Result<()> {
        match (left, right) {
@@ -321,7 +330,7 @@ impl<'a> Filter<'a> {
            (_, _) => (),
        }
        facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
-            rtxn, db, field_id, &left, &right, output,
+            rtxn, db, field_id, &left, &right, universe, output,
        )?;

        Ok(())
@@ -332,31 +341,37 @@ impl<'a> Filter<'a> {
        rtxn: &heed::RoTxn,
        index: &Index,
        filterable_fields: &HashSet<String>,
+        universe: Option<&RoaringBitmap>,
    ) -> Result<RoaringBitmap> {
+        if universe.map_or(false, |u| u.is_empty()) {
+            return Ok(RoaringBitmap::new());
+        }
+
        match &self.condition {
            FilterCondition::Not(f) => {
-                let all_ids = index.documents_ids(rtxn)?;
                let selected = Self::inner_evaluate(
                    &(f.as_ref().clone()).into(),
                    rtxn,
                    index,
                    filterable_fields,
+                    universe,
                )?;
-                Ok(all_ids - selected)
+                match universe {
+                    Some(universe) => Ok(universe - selected),
+                    None => {
+                        let all_ids = index.documents_ids(rtxn)?;
+                        Ok(all_ids - selected)
+                    }
+                }
            }
            FilterCondition::In { fid, els } => {
                if crate::is_faceted(fid.value(), filterable_fields) {
                    let field_ids_map = index.fields_ids_map(rtxn)?;
-
                    if let Some(fid) = field_ids_map.id(fid.value()) {
-                        let mut bitmap = RoaringBitmap::new();
-
-                        for el in els {
-                            let op = Condition::Equal(el.clone());
-                            let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
-                            bitmap |= el_bitmap;
-                        }
-                        Ok(bitmap)
+                        els.iter()
+                            .map(|el| Condition::Equal(el.clone()))
+                            .map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
+                            .union()
                    } else {
                        Ok(RoaringBitmap::new())
                    }
@@ -371,7 +386,7 @@ impl<'a> Filter<'a> {
                if crate::is_faceted(fid.value(), filterable_fields) {
                    let field_ids_map = index.fields_ids_map(rtxn)?;
                    if let Some(fid) = field_ids_map.id(fid.value()) {
-                        Self::evaluate_operator(rtxn, index, fid, op)
+                        Self::evaluate_operator(rtxn, index, fid, universe, op)
                    } else {
                        Ok(RoaringBitmap::new())
                    }
@@ -382,14 +397,11 @@ impl<'a> Filter<'a> {
                    }))?
                }
            }
-            FilterCondition::Or(subfilters) => {
-                let mut bitmap = RoaringBitmap::new();
-                for f in subfilters {
-                    bitmap |=
-                        Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
-                }
-                Ok(bitmap)
-            }
+            FilterCondition::Or(subfilters) => subfilters
+                .iter()
+                .cloned()
+                .map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
+                .union(),
            FilterCondition::And(subfilters) => {
                let mut subfilters_iter = subfilters.iter();
                if let Some(first_subfilter) = subfilters_iter.next() {
@@ -398,16 +410,21 @@ impl<'a> Filter<'a> {
                        rtxn,
                        index,
                        filterable_fields,
+                        universe,
                    )?;
                    for f in subfilters_iter {
                        if bitmap.is_empty() {
                            return Ok(bitmap);
                        }
+                        // TODO We are doing the intersections two times,
+                        //      it could be more efficient
+                        //      Can't I just replace this `&=` by an `=`?
                        bitmap &= Self::inner_evaluate(
                            &(f.clone()).into(),
                            rtxn,
                            index,
                            filterable_fields,
+                            Some(&bitmap),
                        )?;
                    }
                    Ok(bitmap)
@@ -507,6 +524,7 @@ impl<'a> Filter<'a> {
                        rtxn,
                        index,
                        filterable_fields,
+                        universe,
                    )?;

                    let geo_lng_token = Token::new(
@@ -539,6 +557,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?;

                        let condition_right = FilterCondition::Condition {
@@ -552,6 +571,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?;

                        left | right
@@ -567,6 +587,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?
                    };

--- a/milli/src/search/facet/mod.rs
+++ b/milli/src/search/facet/mod.rs
@@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
 pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
 pub use self::filter::{BadGeoError, Filter};
 pub use self::search::{FacetValueHit, SearchForFacetValues};
-use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
+use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
 use crate::heed_codec::BytesRefCodec;
 use crate::{Index, Result};

@@ -54,9 +54,9 @@ pub fn facet_max_value<'t>(
 }

 /// Get the first facet value in the facet database
-pub(crate) fn get_first_facet_value<'t, BoundCodec>(
+pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
    txn: &'t RoTxn,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<Option<BoundCodec::DItem>>
 where
@@ -78,9 +78,9 @@ where
 }

 /// Get the last facet value in the facet database
-pub(crate) fn get_last_facet_value<'t, BoundCodec>(
+pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
    txn: &'t RoTxn,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<Option<BoundCodec::DItem>>
 where
@@ -102,9 +102,9 @@ where
 }

 /// Get the height of the highest level in the facet database
-pub(crate) fn get_highest_level<'t>(
+pub(crate) fn get_highest_level<'t, DC>(
    txn: &'t RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<u8> {
    let field_id_prefix = &field_id.to_be_bytes();
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@@ -169,6 +169,7 @@ impl<'a> Search<'a> {
            index: self.index,
            semantic: self.semantic.clone(),
            time_budget: self.time_budget.clone(),
+            ranking_score_threshold: self.ranking_score_threshold,
        };

        let semantic = search.semantic.take();
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -50,6 +50,7 @@ pub struct Search<'a> {
    index: &'a Index,
    semantic: Option<SemanticSearch>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 }

 impl<'a> Search<'a> {
@@ -70,6 +71,7 @@ impl<'a> Search<'a> {
            index,
            semantic: None,
            time_budget: TimeBudget::max(),
+            ranking_score_threshold: None,
        }
    }

@@ -146,6 +148,11 @@ impl<'a> Search<'a> {
        self
    }

+    pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Search<'a> {
+        self.ranking_score_threshold = Some(ranking_score_threshold);
+        self
+    }
+
    pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
        if has_vector_search {
            let ctx = SearchContext::new(self.index, self.rtxn)?;
@@ -184,6 +191,7 @@ impl<'a> Search<'a> {
                    embedder_name,
                    embedder,
                    self.time_budget.clone(),
+                    self.ranking_score_threshold,
                )?
            }
            _ => execute_search(
@@ -201,6 +209,7 @@ impl<'a> Search<'a> {
                &mut DefaultSearchLogger,
                &mut DefaultSearchLogger,
                self.time_budget.clone(),
+                self.ranking_score_threshold,
            )?,
        };

@@ -239,6 +248,7 @@ impl fmt::Debug for Search<'_> {
            index: _,
            semantic,
            time_budget,
+            ranking_score_threshold,
        } = self;
        f.debug_struct("Search")
            .field("query", query)
@@ -257,6 +267,7 @@ impl fmt::Debug for Search<'_> {
                &semantic.as_ref().map(|semantic| &semantic.embedder_name),
            )
            .field("time_budget", time_budget)
+            .field("ranking_score_threshold", ranking_score_threshold)
            .finish()
    }
 }
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -28,6 +28,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    scoring_strategy: ScoringStrategy,
    logger: &mut dyn SearchLogger<Q>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<BucketSortOutput> {
    logger.initial_query(query);
    logger.ranking_rules(&ranking_rules);
@@ -164,7 +165,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            loop {
                let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
                ranking_rule_scores.push(ScoreDetails::Skipped);
+
+                // remove candidates from the universe without adding them to result if their score is below the threshold
+                if let Some(ranking_score_threshold) = ranking_score_threshold {
+                    let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+                    if current_score < ranking_score_threshold {
+                        all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
+                        back!();
+                        continue;
+                    }
+                }
+
                maybe_add_to_results!(bucket);
+
                ranking_rule_scores.pop();

                if cur_ranking_rule_index == 0 {
@@ -220,6 +233,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        debug_assert!(
            ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates)
        );
+
+        // remove candidates from the universe without adding them to result if their score is below the threshold
+        if let Some(ranking_score_threshold) = ranking_score_threshold {
+            let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+            if current_score < ranking_score_threshold {
+                all_candidates -=
+                    next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
+                back!();
+                continue;
+            }
+        }
+
        ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;

        if cur_ranking_rule_index == ranking_rules_len - 1
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -523,6 +523,7 @@ mod tests {
                &mut crate::DefaultSearchLogger,
                &mut crate::DefaultSearchLogger,
                TimeBudget::max(),
+                None,
            )
            .unwrap();

--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -548,6 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
    Ok(())
 }

+#[tracing::instrument(level = "trace", skip_all, target = "search")]
 pub fn filtered_universe(
    index: &Index,
    txn: &RoTxn<'_>,
@@ -573,6 +574,7 @@ pub fn execute_vector_search(
    embedder_name: &str,
    embedder: &Embedder,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<PartialSearchResult> {
    check_sort_criteria(ctx, sort_criteria.as_ref())?;

@@ -602,6 +604,7 @@ pub fn execute_vector_search(
        scoring_strategy,
        placeholder_search_logger,
        time_budget,
+        ranking_score_threshold,
    )?;

    Ok(PartialSearchResult {
@@ -631,6 +634,7 @@ pub fn execute_search(
    placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
    query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<PartialSearchResult> {
    check_sort_criteria(ctx, sort_criteria.as_ref())?;

@@ -719,6 +723,7 @@ pub fn execute_search(
            scoring_strategy,
            query_graph_logger,
            time_budget,
+            ranking_score_threshold,
        )?
    } else {
        let ranking_rules =
@@ -733,6 +738,7 @@ pub fn execute_search(
            scoring_strategy,
            placeholder_search_logger,
            time_budget,
+            ranking_score_threshold,
        )?
    };

--- a/milli/src/search/similar.rs
+++ b/milli/src/search/similar.rs
@@ -17,6 +17,7 @@ pub struct Similar<'a> {
    index: &'a Index,
    embedder_name: String,
    embedder: Arc<Embedder>,
+    ranking_score_threshold: Option<f64>,
 }

 impl<'a> Similar<'a> {
@@ -29,7 +30,17 @@ impl<'a> Similar<'a> {
        embedder_name: String,
        embedder: Arc<Embedder>,
    ) -> Self {
-        Self { id, filter: None, offset, limit, rtxn, index, embedder_name, embedder }
+        Self {
+            id,
+            filter: None,
+            offset,
+            limit,
+            rtxn,
+            index,
+            embedder_name,
+            embedder,
+            ranking_score_threshold: None,
+        }
    }

    pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self {
@@ -37,8 +48,18 @@ impl<'a> Similar<'a> {
        self
    }

+    pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Self {
+        self.ranking_score_threshold = Some(ranking_score_threshold);
+        self
+    }
+
    pub fn execute(&self) -> Result<SearchResult> {
-        let universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
+        let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
+
+        // we never want to receive the docid
+        universe.remove(self.id);
+
+        let universe = universe;

        let embedder_index =
            self.index
@@ -77,6 +98,8 @@ impl<'a> Similar<'a> {
        let mut documents_seen = RoaringBitmap::new();
        documents_seen.insert(self.id);

+        let mut candidates = universe;
+
        for (docid, distance) in results
            .into_iter()
            // skip documents we've already seen & mark that we saw the current document
@@ -85,8 +108,6 @@ impl<'a> Similar<'a> {
            // take **after** filter and skip so that we get exactly limit elements if available
            .take(self.limit)
        {
-            documents_ids.push(docid);
-
            let score = 1.0 - distance;
            let score = self
                .embedder
@@ -94,14 +115,28 @@ impl<'a> Similar<'a> {
                .map(|distribution| distribution.shift(score))
                .unwrap_or(score);

-            let score = ScoreDetails::Vector(score_details::Vector { similarity: Some(score) });
+            let score_details =
+                vec![ScoreDetails::Vector(score_details::Vector { similarity: Some(score) })];

-            document_scores.push(vec![score]);
+            let score = ScoreDetails::global_score(score_details.iter());
+
+            if let Some(ranking_score_threshold) = &self.ranking_score_threshold {
+                if score < *ranking_score_threshold {
+                    // this document is no longer a candidate
+                    candidates.remove(docid);
+                    // any document after this one is no longer a candidate either, so restrict the set to documents already seen.
+                    candidates &= documents_seen;
+                    break;
+                }
+            }
+
+            documents_ids.push(docid);
+            document_scores.push(score_details);
        }

        Ok(SearchResult {
            matching_words: Default::default(),
-            candidates: universe,
+            candidates,
            documents_ids,
            document_scores,
            degraded: false,
--- a/milli/src/update/del_add.rs
+++ b/milli/src/update/del_add.rs
@@ -40,11 +40,26 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
    operation: DelAddOperation,
    buffer: &mut Vec<u8>,
 ) -> Result<(), std::io::Error> {
+    into_del_add_obkv_conditional_operation(reader, buffer, |_| operation)
+}
+
+/// Akin to the [into_del_add_obkv] function but lets you
+/// conditionally define the `DelAdd` variant based on the obkv key.
+pub fn into_del_add_obkv_conditional_operation<K, F>(
+    reader: obkv::KvReader<K>,
+    buffer: &mut Vec<u8>,
+    operation: F,
+) -> std::io::Result<()>
+where
+    K: obkv::Key + PartialOrd,
+    F: Fn(K) -> DelAddOperation,
+{
    let mut writer = obkv::KvWriter::new(buffer);
    let mut value_buffer = Vec::new();
    for (key, value) in reader.iter() {
        value_buffer.clear();
        let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+        let operation = operation(key);
        if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) {
            value_writer.insert(DelAdd::Deletion, value)?;
        }
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -1,5 +1,5 @@
 use std::borrow::Cow;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, BTreeSet};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
@@ -9,7 +9,7 @@ use std::result::Result as StdResult;
 use bytemuck::bytes_of;
 use grenad::Sorter;
 use heed::BytesEncode;
-use itertools::EitherOrBoth;
+use itertools::{merge_join_by, EitherOrBoth};
 use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use serde_json::{from_slice, Value};
@@ -18,7 +18,7 @@ use FilterableValues::{Empty, Null, Values};
 use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
 use crate::error::InternalError;
 use crate::facet::value_encoding::f64_into_bytes;
-use crate::update::del_add::{DelAdd, KvWriterDelAdd};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::{create_writer, writer_into_reader};
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
@@ -75,149 +75,181 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    let mut numbers_key_buffer = Vec::new();
    let mut strings_key_buffer = Vec::new();

-    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
-        let obkv = obkv::KvReader::new(value);
+    let old_faceted_fids: BTreeSet<_> =
+        settings_diff.old.faceted_fields_ids.iter().copied().collect();
+    let new_faceted_fids: BTreeSet<_> =
+        settings_diff.new.faceted_fields_ids.iter().copied().collect();

-        for (field_id, field_bytes) in obkv.iter() {
-            let delete_faceted = settings_diff.old.faceted_fields_ids.contains(&field_id);
-            let add_faceted = settings_diff.new.faceted_fields_ids.contains(&field_id);
-            if delete_faceted || add_faceted {
-                numbers_key_buffer.clear();
-                strings_key_buffer.clear();
+    if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
+        let mut cursor = obkv_documents.into_cursor()?;
+        while let Some((docid_bytes, value)) = cursor.move_on_next()? {
+            let obkv = obkv::KvReader::new(value);
+            let get_document_json_value = move |field_id, side| {
+                obkv.get(field_id)
+                    .map(KvReaderDelAdd::new)
+                    .and_then(|kv| kv.get(side))
+                    .map(from_slice)
+                    .transpose()
+                    .map_err(InternalError::SerdeJson)
+            };
+            // iterate over the faceted fields instead of over the whole document.
+            for eob in
+                merge_join_by(old_faceted_fids.iter(), new_faceted_fids.iter(), |old, new| {
+                    old.cmp(new)
+                })
+            {
+                let (field_id, del_value, add_value) = match eob {
+                    EitherOrBoth::Left(&field_id) => {
+                        let del_value = get_document_json_value(field_id, DelAdd::Deletion)?;

-                // Set key to the field_id
-                // Note: this encoding is consistent with FieldIdCodec
-                numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
-                strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                        // deletion only
+                        (field_id, del_value, None)
+                    }
+                    EitherOrBoth::Right(&field_id) => {
+                        let add_value = get_document_json_value(field_id, DelAdd::Addition)?;

-                let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
-                let document = DocumentId::from_be_bytes(document);
+                        // addition only
+                        (field_id, None, add_value)
+                    }
+                    EitherOrBoth::Both(&field_id, _) => {
+                        // during settings update, recompute the changing settings only.
+                        if settings_diff.settings_update_only {
+                            continue;
+                        }

-                // For the other extraction tasks, prefix the key with the field_id and the document_id
-                numbers_key_buffer.extend_from_slice(docid_bytes);
-                strings_key_buffer.extend_from_slice(docid_bytes);
+                        let del_value = get_document_json_value(field_id, DelAdd::Deletion)?;
+                        let add_value = get_document_json_value(field_id, DelAdd::Addition)?;

-                let del_add_obkv = obkv::KvReader::new(field_bytes);
-                let del_value = match del_add_obkv.get(DelAdd::Deletion).filter(|_| delete_faceted)
-                {
-                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
-                    None => None,
-                };
-                let add_value = match del_add_obkv.get(DelAdd::Addition).filter(|_| add_faceted) {
-                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
-                    None => None,
+                        (field_id, del_value, add_value)
+                    }
                };

-                // We insert the document id on the Del and the Add side if the field exists.
-                let (ref mut del_exists, ref mut add_exists) =
-                    facet_exists_docids.entry(field_id).or_default();
-                let (ref mut del_is_null, ref mut add_is_null) =
-                    facet_is_null_docids.entry(field_id).or_default();
-                let (ref mut del_is_empty, ref mut add_is_empty) =
-                    facet_is_empty_docids.entry(field_id).or_default();
+                if del_value.is_some() || add_value.is_some() {
+                    numbers_key_buffer.clear();
+                    strings_key_buffer.clear();

-                if del_value.is_some() {
-                    del_exists.insert(document);
-                }
-                if add_value.is_some() {
-                    add_exists.insert(document);
-                }
+                    // Set key to the field_id
+                    // Note: this encoding is consistent with FieldIdCodec
+                    numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                    strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());

-                let del_geo_support = settings_diff
-                    .old
-                    .geo_fields_ids
-                    .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
-                let add_geo_support = settings_diff
-                    .new
-                    .geo_fields_ids
-                    .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
-                let del_filterable_values =
-                    del_value.map(|value| extract_facet_values(&value, del_geo_support));
-                let add_filterable_values =
-                    add_value.map(|value| extract_facet_values(&value, add_geo_support));
+                    let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
+                    let document = DocumentId::from_be_bytes(document);

-                // Those closures are just here to simplify things a bit.
-                let mut insert_numbers_diff = |del_numbers, add_numbers| {
-                    insert_numbers_diff(
-                        &mut fid_docid_facet_numbers_sorter,
-                        &mut numbers_key_buffer,
-                        del_numbers,
-                        add_numbers,
-                    )
-                };
-                let mut insert_strings_diff = |del_strings, add_strings| {
-                    insert_strings_diff(
-                        &mut fid_docid_facet_strings_sorter,
-                        &mut strings_key_buffer,
-                        del_strings,
-                        add_strings,
-                    )
-                };
+                    // For the other extraction tasks, prefix the key with the field_id and the document_id
+                    numbers_key_buffer.extend_from_slice(docid_bytes);
+                    strings_key_buffer.extend_from_slice(docid_bytes);

-                match (del_filterable_values, add_filterable_values) {
-                    (None, None) => (),
-                    (Some(del_filterable_values), None) => match del_filterable_values {
-                        Null => {
-                            del_is_null.insert(document);
-                        }
-                        Empty => {
-                            del_is_empty.insert(document);
-                        }
-                        Values { numbers, strings } => {
-                            insert_numbers_diff(numbers, vec![])?;
-                            insert_strings_diff(strings, vec![])?;
-                        }
-                    },
-                    (None, Some(add_filterable_values)) => match add_filterable_values {
-                        Null => {
-                            add_is_null.insert(document);
-                        }
-                        Empty => {
-                            add_is_empty.insert(document);
-                        }
-                        Values { numbers, strings } => {
-                            insert_numbers_diff(vec![], numbers)?;
-                            insert_strings_diff(vec![], strings)?;
-                        }
-                    },
-                    (Some(del_filterable_values), Some(add_filterable_values)) => {
-                        match (del_filterable_values, add_filterable_values) {
-                            (Null, Null) | (Empty, Empty) => (),
-                            (Null, Empty) => {
-                                del_is_null.insert(document);
-                                add_is_empty.insert(document);
-                            }
-                            (Empty, Null) => {
-                                del_is_empty.insert(document);
-                                add_is_null.insert(document);
-                            }
-                            (Null, Values { numbers, strings }) => {
-                                insert_numbers_diff(vec![], numbers)?;
-                                insert_strings_diff(vec![], strings)?;
+                    // We insert the document id on the Del and the Add side if the field exists.
+                    let (ref mut del_exists, ref mut add_exists) =
+                        facet_exists_docids.entry(field_id).or_default();
+                    let (ref mut del_is_null, ref mut add_is_null) =
+                        facet_is_null_docids.entry(field_id).or_default();
+                    let (ref mut del_is_empty, ref mut add_is_empty) =
+                        facet_is_empty_docids.entry(field_id).or_default();
+
+                    if del_value.is_some() {
+                        del_exists.insert(document);
+                    }
+                    if add_value.is_some() {
+                        add_exists.insert(document);
+                    }
+
+                    let del_geo_support = settings_diff
+                        .old
+                        .geo_fields_ids
+                        .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                    let add_geo_support = settings_diff
+                        .new
+                        .geo_fields_ids
+                        .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                    let del_filterable_values =
+                        del_value.map(|value| extract_facet_values(&value, del_geo_support));
+                    let add_filterable_values =
+                        add_value.map(|value| extract_facet_values(&value, add_geo_support));
+
+                    // Those closures are just here to simplify things a bit.
+                    let mut insert_numbers_diff = |del_numbers, add_numbers| {
+                        insert_numbers_diff(
+                            &mut fid_docid_facet_numbers_sorter,
+                            &mut numbers_key_buffer,
+                            del_numbers,
+                            add_numbers,
+                        )
+                    };
+                    let mut insert_strings_diff = |del_strings, add_strings| {
+                        insert_strings_diff(
+                            &mut fid_docid_facet_strings_sorter,
+                            &mut strings_key_buffer,
+                            del_strings,
+                            add_strings,
+                        )
+                    };
+
+                    match (del_filterable_values, add_filterable_values) {
+                        (None, None) => (),
+                        (Some(del_filterable_values), None) => match del_filterable_values {
+                            Null => {
                                del_is_null.insert(document);
                            }
-                            (Empty, Values { numbers, strings }) => {
-                                insert_numbers_diff(vec![], numbers)?;
-                                insert_strings_diff(vec![], strings)?;
+                            Empty => {
                                del_is_empty.insert(document);
                            }
-                            (Values { numbers, strings }, Null) => {
-                                add_is_null.insert(document);
+                            Values { numbers, strings } => {
                                insert_numbers_diff(numbers, vec![])?;
                                insert_strings_diff(strings, vec![])?;
                            }
-                            (Values { numbers, strings }, Empty) => {
-                                add_is_empty.insert(document);
-                                insert_numbers_diff(numbers, vec![])?;
-                                insert_strings_diff(strings, vec![])?;
+                        },
+                        (None, Some(add_filterable_values)) => match add_filterable_values {
+                            Null => {
+                                add_is_null.insert(document);
                            }
-                            (
-                                Values { numbers: del_numbers, strings: del_strings },
-                                Values { numbers: add_numbers, strings: add_strings },
-                            ) => {
-                                insert_numbers_diff(del_numbers, add_numbers)?;
-                                insert_strings_diff(del_strings, add_strings)?;
+                            Empty => {
+                                add_is_empty.insert(document);
+                            }
+                            Values { numbers, strings } => {
+                                insert_numbers_diff(vec![], numbers)?;
+                                insert_strings_diff(vec![], strings)?;
+                            }
+                        },
+                        (Some(del_filterable_values), Some(add_filterable_values)) => {
+                            match (del_filterable_values, add_filterable_values) {
+                                (Null, Null) | (Empty, Empty) => (),
+                                (Null, Empty) => {
+                                    del_is_null.insert(document);
+                                    add_is_empty.insert(document);
+                                }
+                                (Empty, Null) => {
+                                    del_is_empty.insert(document);
+                                    add_is_null.insert(document);
+                                }
+                                (Null, Values { numbers, strings }) => {
+                                    insert_numbers_diff(vec![], numbers)?;
+                                    insert_strings_diff(vec![], strings)?;
+                                    del_is_null.insert(document);
+                                }
+                                (Empty, Values { numbers, strings }) => {
+                                    insert_numbers_diff(vec![], numbers)?;
+                                    insert_strings_diff(vec![], strings)?;
+                                    del_is_empty.insert(document);
+                                }
+                                (Values { numbers, strings }, Null) => {
+                                    add_is_null.insert(document);
+                                    insert_numbers_diff(numbers, vec![])?;
+                                    insert_strings_diff(strings, vec![])?;
+                                }
+                                (Values { numbers, strings }, Empty) => {
+                                    add_is_empty.insert(document);
+                                    insert_numbers_diff(numbers, vec![])?;
+                                    insert_strings_diff(strings, vec![])?;
+                                }
+                                (
+                                    Values { numbers: del_numbers, strings: del_strings },
+                                    Values { numbers: add_numbers, strings: add_strings },
+                                ) => {
+                                    insert_numbers_diff(del_numbers, add_numbers)?;
+                                    insert_strings_diff(del_strings, add_strings)?;
+                                }
                            }
                        }
                    }
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@@ -26,11 +26,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
-    let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
-
    // early return if the data shouldn't be deleted nor created.
-    if !any_deletion && !any_addition {
+    if settings_diff.settings_update_only && !settings_diff.reindex_proximities() {
        let writer = create_writer(
            indexer.chunk_compression_type,
            indexer.chunk_compression_level,
@@ -39,8 +36,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
        return writer_into_reader(writer);
    }

-    let max_memory = indexer.max_memory_by_thread();
+    let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
+    let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;

+    let max_memory = indexer.max_memory_by_thread();
    let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
        .map(|_| {
            create_sorter(
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -8,6 +8,7 @@ mod extract_vector_points;
 mod extract_word_docids;
 mod extract_word_pair_proximity_docids;
 mod extract_word_position_docids;
+// mod searchable;

 use std::fs::File;
 use std::io::BufReader;
--- a/milli/src/update/index_documents/extract/searchable/field_word_position.rs
+++ b/milli/src/update/index_documents/extract/searchable/field_word_position.rs
@@ -0,0 +1,211 @@
+use std::collections::HashMap;
+
+use charabia::normalizer::NormalizedTokenIter;
+use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
+use roaring::RoaringBitmap;
+use serde_json::Value;
+
+use crate::update::settings::InnerIndexSettings;
+use crate::{InternalError, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
+
+pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;
+
+pub struct FieldWordPositionExtractorBuilder<'a> {
+    max_positions_per_attributes: u16,
+    stop_words: Option<&'a fst::Set<Vec<u8>>>,
+    separators: Option<Vec<&'a str>>,
+    dictionary: Option<Vec<&'a str>>,
+}
+
+impl<'a> FieldWordPositionExtractorBuilder<'a> {
+    pub fn new(
+        max_positions_per_attributes: Option<u32>,
+        settings: &'a InnerIndexSettings,
+    ) -> Result<Self> {
+        let stop_words = settings.stop_words.as_ref();
+        let separators: Option<Vec<_>> =
+            settings.allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let dictionary: Option<Vec<_>> =
+            settings.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        Ok(Self {
+            max_positions_per_attributes: max_positions_per_attributes
+                .map_or(MAX_POSITION_PER_ATTRIBUTE as u16, |max| {
+                    max.min(MAX_POSITION_PER_ATTRIBUTE) as u16
+                }),
+            stop_words,
+            separators,
+            dictionary,
+        })
+    }
+
+    pub fn build(&'a self) -> FieldWordPositionExtractor<'a> {
+        let builder = tokenizer_builder(
+            self.stop_words,
+            self.separators.as_deref(),
+            self.dictionary.as_deref(),
+            None,
+        );
+
+        FieldWordPositionExtractor {
+            tokenizer: builder.into_tokenizer(),
+            max_positions_per_attributes: self.max_positions_per_attributes,
+        }
+    }
+}
+
+pub struct FieldWordPositionExtractor<'a> {
+    tokenizer: Tokenizer<'a>,
+    max_positions_per_attributes: u16,
+}
+
+impl<'a> FieldWordPositionExtractor<'a> {
+    pub fn extract<'b>(
+        &'a self,
+        field_bytes: &[u8],
+        buffer: &'b mut String,
+    ) -> Result<ExtractedFieldWordPosition<'a, 'b>> {
+        let field_value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+        Ok(ExtractedFieldWordPosition {
+            tokenizer: &self.tokenizer,
+            max_positions_per_attributes: self.max_positions_per_attributes,
+            field_value,
+            buffer: buffer,
+        })
+    }
+}
+
+pub struct ExtractedFieldWordPosition<'a, 'b> {
+    tokenizer: &'a Tokenizer<'a>,
+    max_positions_per_attributes: u16,
+    field_value: Value,
+    buffer: &'b mut String,
+}
+
+impl<'a> ExtractedFieldWordPosition<'a, '_> {
+    pub fn iter<'o>(&'o mut self) -> FieldWordPositionIter<'o> {
+        self.buffer.clear();
+        let inner = match json_to_string(&self.field_value, &mut self.buffer) {
+            Some(field) => Some(self.tokenizer.tokenize(field)),
+            None => None,
+        };
+
+        // create an iterator of token with their positions.
+        FieldWordPositionIter {
+            inner,
+            max_positions_per_attributes: self.max_positions_per_attributes,
+            position: 0,
+            prev_kind: None,
+        }
+    }
+}
+
+pub struct FieldWordPositionIter<'a> {
+    inner: Option<NormalizedTokenIter<'a, 'a>>,
+    max_positions_per_attributes: u16,
+    position: u16,
+    prev_kind: Option<TokenKind>,
+}
+
+impl<'a> Iterator for FieldWordPositionIter<'a> {
+    type Item = (u16, Token<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.position >= self.max_positions_per_attributes {
+            return None;
+        }
+
+        let token = self.inner.as_mut().map(|i| i.next()).flatten()?;
+
+        match token.kind {
+            TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
+                self.position += match self.prev_kind {
+                    Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
+                    Some(_) => 1,
+                    None => 0,
+                };
+                self.prev_kind = Some(token.kind)
+            }
+            TokenKind::Separator(_) if self.position == 0 => {
+                return self.next();
+            }
+            TokenKind::Separator(SeparatorKind::Hard) => {
+                self.prev_kind = Some(token.kind);
+            }
+            TokenKind::Separator(SeparatorKind::Soft)
+                if self.prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) =>
+            {
+                self.prev_kind = Some(token.kind);
+            }
+            _ => return self.next(),
+        }
+
+        if !token.is_word() {
+            return self.next();
+        }
+
+        // keep a word only if it is not empty and fit in a LMDB key.
+        let lemma = token.lemma().trim();
+        if !lemma.is_empty() && lemma.len() <= MAX_WORD_LENGTH {
+            Some((self.position, token))
+        } else {
+            self.next()
+        }
+    }
+}
+
+/// Factorize tokenizer building.
+pub fn tokenizer_builder<'a>(
+    stop_words: Option<&'a fst::Set<Vec<u8>>>,
+    allowed_separators: Option<&'a [&str]>,
+    dictionary: Option<&'a [&str]>,
+    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
+) -> TokenizerBuilder<'a, Vec<u8>> {
+    let mut tokenizer_builder = TokenizerBuilder::new();
+    if let Some(stop_words) = stop_words {
+        tokenizer_builder.stop_words(stop_words);
+    }
+    if let Some(dictionary) = dictionary {
+        tokenizer_builder.words_dict(dictionary);
+    }
+    if let Some(separators) = allowed_separators {
+        tokenizer_builder.separators(separators);
+    }
+
+    if let Some(script_language) = script_language {
+        tokenizer_builder.allow_list(script_language);
+    }
+
+    tokenizer_builder
+}
+
+/// Transform a JSON value into a string that can be indexed.
+fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a str> {
+    fn inner(value: &Value, output: &mut String) -> bool {
+        use std::fmt::Write;
+        match value {
+            Value::Null | Value::Object(_) => false,
+            Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
+            Value::Number(number) => write!(output, "{}", number).is_ok(),
+            Value::String(string) => write!(output, "{}", string).is_ok(),
+            Value::Array(array) => {
+                let mut count = 0;
+                for value in array {
+                    if inner(value, output) {
+                        output.push_str(". ");
+                        count += 1;
+                    }
+                }
+                // check that at least one value was written
+                count != 0
+            }
+        }
+    }
+
+    if let Value::String(string) = value {
+        Some(string)
+    } else if inner(value, buffer) {
+        Some(buffer)
+    } else {
+        None
+    }
+}
--- a/milli/src/update/index_documents/extract/searchable/mod.rs
+++ b/milli/src/update/index_documents/extract/searchable/mod.rs
@@ -0,0 +1,114 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::convert::TryInto;
+use std::fs::File;
+use std::io;
+use std::io::BufReader;
+
+use field_word_position::FieldWordPositionExtractorBuilder;
+use obkv::KvReader;
+use roaring::RoaringBitmap;
+use word_docids::{WordDocidsDump, WordDocidsExtractor};
+
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::update::index_documents::extract::extract_docid_word_positions::ScriptLanguageDocidsMap;
+use crate::update::index_documents::GrenadParameters;
+use crate::update::settings::InnerIndexSettingsDiff;
+use crate::{FieldId, Result, SerializationError};
+
+mod field_word_position;
+mod word_docids;
+
+#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
+pub fn extract_searchable_data<R: io::Read + io::Seek>(
+    obkv_documents: grenad::Reader<R>,
+    indexer: GrenadParameters,
+    settings_diff: &InnerIndexSettingsDiff,
+    max_positions_per_attributes: Option<u32>,
+) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+    let searchable_fields_to_index = settings_diff.searchable_fields_to_index();
+
+    let mut documents_ids = RoaringBitmap::new();
+
+    let add_builder =
+        FieldWordPositionExtractorBuilder::new(max_positions_per_attributes, &settings_diff.new)?;
+    let add_token_positions_extractor = add_builder.build();
+    let del_builder;
+    let del_token_positions_extractor = if settings_diff.settings_update_only {
+        del_builder = FieldWordPositionExtractorBuilder::new(
+            max_positions_per_attributes,
+            &settings_diff.old,
+        )?;
+        del_builder.build()
+    } else {
+        add_builder.build()
+    };
+    let token_positions_extractor = &[del_token_positions_extractor, add_token_positions_extractor];
+
+    let mut word_map = BTreeMap::new();
+    let mut word_docids_extractor = WordDocidsExtractor::new(settings_diff);
+
+    let mut cursor = obkv_documents.into_cursor()?;
+    // loop over documents
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let document_id = key
+            .try_into()
+            .map(u32::from_be_bytes)
+            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+        let obkv = KvReader::<FieldId>::new(value);
+        // if the searchable fields didn't change, skip the searchable indexing for this document.
+        if !settings_diff.reindex_searchable()
+            && !searchable_fields_changed(&obkv, &searchable_fields_to_index)
+        {
+            continue;
+        }
+
+        documents_ids.push(document_id);
+
+        let mut buffer = String::new();
+        for field_id in searchable_fields_to_index.iter() {
+            let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
+
+            for (deladd, field_bytes) in field_obkv {
+                let mut extracted_positions =
+                    token_positions_extractor[deladd as usize].extract(field_bytes, &mut buffer)?;
+                for (position, token) in extracted_positions.iter() {
+                    let word = token.lemma().trim();
+                    if !word_map.contains_key(word) {
+                        word_map.insert(word.to_string(), word_map.len() as u32);
+                    }
+                    let word_id = word_map.get(word).unwrap();
+                    word_docids_extractor.insert(*word_id, *field_id, document_id, deladd);
+                }
+            }
+        }
+
+        if word_docids_extractor.rough_size_estimate()
+            > indexer.max_memory.map_or(512 * 1024 * 1024, |s| s.min(512 * 1024 * 1024))
+        {
+            let WordDocidsDump { .. } =
+                word_docids_extractor.dump(&word_map, &searchable_fields_to_index, indexer)?;
+        }
+    }
+
+    todo!()
+}
+
+/// Check if any searchable fields of a document changed.
+fn searchable_fields_changed(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &BTreeSet<FieldId>,
+) -> bool {
+    for field_id in searchable_fields {
+        let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
+        match (field_obkv.get(DelAdd::Deletion), field_obkv.get(DelAdd::Addition)) {
+            // if both fields are None, check the next field.
+            (None, None) => (),
+            // if both contains a value and values are the same, check the next field.
+            (Some(del), Some(add)) if del == add => (),
+            // otherwise the fields are different, return true.
+            _otherwise => return true,
+        }
+    }
+
+    false
+}
--- a/milli/src/update/index_documents/extract/searchable/word_docids.rs
+++ b/milli/src/update/index_documents/extract/searchable/word_docids.rs
@@ -0,0 +1,203 @@
+use std::collections::hash_map::Entry::{Occupied, Vacant};
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::fs::File;
+use std::hash::Hash;
+use std::io::BufReader;
+use std::mem::size_of;
+
+use roaring::RoaringBitmap;
+
+use crate::update::del_add::KvWriterDelAdd;
+use crate::update::index_documents::extract::searchable::DelAdd;
+use crate::update::index_documents::{create_writer, writer_into_reader, GrenadParameters};
+use crate::update::settings::InnerIndexSettingsDiff;
+use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result};
+
+pub struct WordDocidsExtractor<'a> {
+    word_fid_docids: RevertedIndex<(u32, FieldId)>,
+    settings_diff: &'a InnerIndexSettingsDiff,
+}
+
+impl<'a> WordDocidsExtractor<'a> {
+    pub fn new(settings_diff: &'a InnerIndexSettingsDiff) -> Self {
+        Self { word_fid_docids: RevertedIndex::new(), settings_diff }
+    }
+    pub fn insert(&mut self, wordid: u32, fieldid: FieldId, docid: DocumentId, del_add: DelAdd) {
+        self.word_fid_docids.insert((wordid, fieldid), docid, del_add);
+    }
+
+    pub fn rough_size_estimate(&self) -> usize {
+        self.word_fid_docids.rough_size_estimate()
+    }
+
+    pub fn dump(
+        &mut self,
+        word_map: &BTreeMap<String, u32>,
+        fields: &BTreeSet<FieldId>,
+        indexer: GrenadParameters,
+    ) -> Result<WordDocidsDump> {
+        let mut word_fid_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut word_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut exact_word_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut exact_word_deletion = RoaringBitmap::new();
+        let mut exact_word_addition = RoaringBitmap::new();
+        let mut word_deletion = RoaringBitmap::new();
+        let mut word_addition = RoaringBitmap::new();
+        let mut key_buffer = Vec::new();
+        let mut bitmap_buffer = Vec::new();
+        let mut obkv_buffer = Vec::new();
+        for (word, wid) in word_map {
+            exact_word_deletion.clear();
+            exact_word_addition.clear();
+            word_deletion.clear();
+            word_addition.clear();
+            for fid in fields {
+                if let Some((deletion, addition)) = self.word_fid_docids.inner.get(&(*wid, *fid)) {
+                    if self.settings_diff.old.exact_attributes.contains(&fid) {
+                        exact_word_deletion |= deletion;
+                    } else {
+                        word_deletion |= deletion;
+                    }
+
+                    if self.settings_diff.new.exact_attributes.contains(&fid) {
+                        exact_word_addition |= addition;
+                    } else {
+                        word_addition |= addition;
+                    }
+
+                    if deletion != addition {
+                        key_buffer.clear();
+                        key_buffer.extend_from_slice(word.as_bytes());
+                        key_buffer.push(0);
+                        key_buffer.extend_from_slice(&fid.to_be_bytes());
+                        let value = bitmaps_into_deladd_obkv(
+                            deletion,
+                            addition,
+                            &mut obkv_buffer,
+                            &mut bitmap_buffer,
+                        )?;
+                        word_fid_docids_writer.insert(&key_buffer, value)?;
+                    }
+                }
+            }
+
+            key_buffer.clear();
+            key_buffer.extend_from_slice(word.as_bytes());
+            if exact_word_deletion != exact_word_addition {
+                let value = bitmaps_into_deladd_obkv(
+                    &exact_word_deletion,
+                    &exact_word_addition,
+                    &mut obkv_buffer,
+                    &mut bitmap_buffer,
+                )?;
+                exact_word_docids_writer.insert(&key_buffer, value)?;
+            }
+
+            if word_deletion != word_addition {
+                let value = bitmaps_into_deladd_obkv(
+                    &word_deletion,
+                    &word_addition,
+                    &mut obkv_buffer,
+                    &mut bitmap_buffer,
+                )?;
+                word_docids_writer.insert(&key_buffer, value)?;
+            }
+        }
+
+        self.word_fid_docids.clear();
+
+        Ok(WordDocidsDump {
+            word_fid_docids: writer_into_reader(word_fid_docids_writer)?,
+            word_docids: writer_into_reader(word_docids_writer)?,
+            exact_word_docids: writer_into_reader(exact_word_docids_writer)?,
+        })
+    }
+}
+
+fn bitmaps_into_deladd_obkv<'a>(
+    deletion: &RoaringBitmap,
+    addition: &RoaringBitmap,
+    obkv_buffer: &'a mut Vec<u8>,
+    bitmap_buffer: &mut Vec<u8>,
+) -> Result<&'a mut Vec<u8>> {
+    obkv_buffer.clear();
+    let mut value_writer = KvWriterDelAdd::new(obkv_buffer);
+    if !deletion.is_empty() {
+        bitmap_buffer.clear();
+        CboRoaringBitmapCodec::serialize_into(deletion, bitmap_buffer);
+        value_writer.insert(DelAdd::Deletion, &*bitmap_buffer)?;
+    }
+    if !addition.is_empty() {
+        bitmap_buffer.clear();
+        CboRoaringBitmapCodec::serialize_into(addition, bitmap_buffer);
+        value_writer.insert(DelAdd::Addition, &*bitmap_buffer)?;
+    }
+    Ok(value_writer.into_inner()?)
+}
+
+#[derive(Debug)]
+struct RevertedIndex<K> {
+    inner: HashMap<K, (RoaringBitmap, RoaringBitmap)>,
+    max_value_size: usize,
+}
+
+impl<K: PartialEq + Eq + Hash> RevertedIndex<K> {
+    pub fn insert(&mut self, key: K, docid: DocumentId, del_add: DelAdd) {
+        let size = match self.inner.entry(key) {
+            Occupied(mut entry) => {
+                let (ref mut del, ref mut add) = entry.get_mut();
+                match del_add {
+                    DelAdd::Deletion => del.insert(docid),
+                    DelAdd::Addition => add.insert(docid),
+                };
+                del.serialized_size() + add.serialized_size()
+            }
+            Vacant(entry) => {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(docid);
+                let size = bitmap.serialized_size();
+                match del_add {
+                    DelAdd::Deletion => entry.insert((bitmap, RoaringBitmap::new())),
+                    DelAdd::Addition => entry.insert((RoaringBitmap::new(), bitmap)),
+                };
+                size * 2
+            }
+        };
+
+        self.max_value_size = self.max_value_size.max(size);
+    }
+
+    pub fn new() -> Self {
+        Self { inner: HashMap::new(), max_value_size: 0 }
+    }
+
+    pub fn rough_size_estimate(&self) -> usize {
+        self.inner.len() * size_of::<K>() + self.inner.len() * self.max_value_size
+    }
+
+    fn clear(&mut self) {
+        self.max_value_size = 0;
+        self.inner.clear();
+    }
+}
+
+pub struct WordDocidsDump {
+    pub word_fid_docids: grenad::Reader<BufReader<File>>,
+    pub word_docids: grenad::Reader<BufReader<File>>,
+    pub exact_word_docids: grenad::Reader<BufReader<File>>,
+}
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -369,6 +369,7 @@ where

        // Run extraction pipeline in parallel.
        pool.install(|| {
+            let settings_diff_cloned = settings_diff.clone();
            rayon::spawn(move || {
                let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
                let _enter = child_span.enter();
@@ -398,7 +399,7 @@ where
                        pool_params,
                        lmdb_writer_sx.clone(),
                        primary_key_id,
-                        settings_diff.clone(),
+                        settings_diff_cloned,
                        max_positions_per_attributes,
                    )
                });
@@ -425,7 +426,7 @@ where
                    Err(status) => {
                        if let Some(typed_chunks) = chunk_accumulator.pop_longest() {
                            let (docids, is_merged_database) =
-                                write_typed_chunk_into_index(typed_chunks, self.index, self.wtxn)?;
+                                write_typed_chunk_into_index(self.wtxn, self.index, &settings_diff, typed_chunks)?;
                            if !docids.is_empty() {
                                final_documents_ids |= docids;
                                let documents_seen_count = final_documents_ids.len();
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -20,7 +20,10 @@ use super::{IndexDocumentsMethod, IndexerConfig};
 use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::{db_name, main_key};
-use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
+use crate::update::del_add::{
+    into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
+    KvReaderDelAdd,
+};
 use crate::update::index_documents::GrenadParameters;
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
@@ -805,13 +808,15 @@ impl<'a, 'i> Transform<'a, 'i> {
        let mut new_inner_settings = old_inner_settings.clone();
        new_inner_settings.fields_ids_map = fields_ids_map;

-        let settings_diff = InnerIndexSettingsDiff {
-            old: old_inner_settings,
-            new: new_inner_settings,
+        let embedding_configs_updated = false;
+        let settings_update_only = false;
+        let settings_diff = InnerIndexSettingsDiff::new(
+            old_inner_settings,
+            new_inner_settings,
            primary_key_id,
-            embedding_configs_updated: false,
-            settings_update_only: false,
-        };
+            embedding_configs_updated,
+            settings_update_only,
+        );

        Ok(TransformOutput {
            primary_key,
@@ -840,14 +845,6 @@ impl<'a, 'i> Transform<'a, 'i> {
        // Always keep the primary key.
        let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };

-        // If only the `searchableAttributes` has been changed, keep only the searchable fields.
-        let must_reindex_searchables = settings_diff.reindex_searchable();
-        let necessary_searchable_field = |id: FieldId| -> bool {
-            must_reindex_searchables
-                && (settings_diff.old.searchable_fields_ids.contains(&id)
-                    || settings_diff.new.searchable_fields_ids.contains(&id))
-        };
-
        // If only a faceted field has been added, keep only this field.
        let must_reindex_facets = settings_diff.reindex_facets();
        let necessary_faceted_field = |id: FieldId| -> bool {
@@ -862,13 +859,16 @@ impl<'a, 'i> Transform<'a, 'i> {
        // we need the fields for the prompt/templating.
        let reindex_vectors = settings_diff.reindex_vectors();

+        // The operations that we must perform on the different fields.
+        let mut operations = HashMap::new();
+
        let mut obkv_writer = KvWriter::<_, FieldId>::memory();
        for (id, val) in old_obkv.iter() {
-            if is_primary_key(id)
-                || necessary_searchable_field(id)
-                || necessary_faceted_field(id)
-                || reindex_vectors
-            {
+            if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors {
+                operations.insert(id, DelAddOperation::DeletionAndAddition);
+                obkv_writer.insert(id, val)?;
+            } else if let Some(operation) = settings_diff.reindex_searchable_id(id) {
+                operations.insert(id, operation);
                obkv_writer.insert(id, val)?;
            }
        }
@@ -887,11 +887,9 @@ impl<'a, 'i> Transform<'a, 'i> {
            let flattened = flattened.as_deref().map_or(obkv, KvReader::new);

            flattened_obkv_buffer.clear();
-            into_del_add_obkv(
-                flattened,
-                DelAddOperation::DeletionAndAddition,
-                flattened_obkv_buffer,
-            )?;
+            into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
+                operations.get(&id).copied().unwrap_or(DelAddOperation::DeletionAndAddition)
+            })?;
        }

        Ok(())
@@ -901,6 +899,11 @@ impl<'a, 'i> Transform<'a, 'i> {
    /// of the index with the attributes reordered accordingly to the `FieldsIdsMap` given as argument.
    ///
    // TODO this can be done in parallel by using the rayon `ThreadPool`.
+    #[tracing::instrument(
+        level = "trace"
+        skip(self, wtxn, settings_diff),
+        target = "indexing::documents"
+    )]
    pub fn prepare_for_documents_reindexing(
        self,
        wtxn: &mut heed::RwTxn<'i>,
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
 use grenad::{Merger, MergerBuilder};
 use heed::types::Bytes;
-use heed::RwTxn;
+use heed::{BytesDecode, RwTxn};
 use obkv::{KvReader, KvWriter};
 use roaring::RoaringBitmap;

@@ -20,13 +20,16 @@ use super::MergeFn;
 use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
 use crate::facet::FacetType;
 use crate::index::db_name::DOCUMENTS;
+use crate::proximity::MAX_DISTANCE;
 use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
 use crate::update::facet::FacetsUpdate;
 use crate::update::index_documents::helpers::{
    as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
 };
+use crate::update::settings::InnerIndexSettingsDiff;
 use crate::{
-    lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError,
+    lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
+    Result, SerializationError, U8StrStrCodec,
 };

 /// This struct accumulates and group the TypedChunks
@@ -122,9 +125,10 @@ impl TypedChunk {
 /// Return new documents seen.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
 pub(crate) fn write_typed_chunk_into_index(
-    typed_chunks: Vec<TypedChunk>,
-    index: &Index,
    wtxn: &mut RwTxn,
+    index: &Index,
+    settings_diff: &InnerIndexSettingsDiff,
+    typed_chunks: Vec<TypedChunk>,
 ) -> Result<(RoaringBitmap, bool)> {
    let mut is_merged_database = false;
    match typed_chunks[0] {
@@ -485,13 +489,22 @@ pub(crate) fn write_typed_chunk_into_index(
            }
            let merger = builder.build();

-            write_entries_into_database(
-                merger,
-                &index.word_pair_proximity_docids,
-                wtxn,
-                deladd_serialize_add_side,
-                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
-            )?;
+            if settings_diff.only_additional_fields.is_some() {
+                write_proximity_entries_into_database_additional_searchables(
+                    merger,
+                    &index.word_pair_proximity_docids,
+                    wtxn,
+                )?;
+            } else {
+                write_entries_into_database(
+                    merger,
+                    &index.word_pair_proximity_docids,
+                    wtxn,
+                    deladd_serialize_add_side,
+                    merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
+                )?;
+            }
+
            is_merged_database = true;
        }
        TypedChunk::FieldIdDocidFacetNumbers(_) => {
@@ -830,3 +843,51 @@ where
    }
    Ok(())
 }
+
+/// Akin to the `write_entries_into_database` function but specialized
+/// for the case when we only index additional searchable fields only.
+#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
+fn write_proximity_entries_into_database_additional_searchables<R>(
+    merger: Merger<R, MergeFn>,
+    database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
+    wtxn: &mut RwTxn,
+) -> Result<()>
+where
+    R: io::Read + io::Seek,
+{
+    let mut iter = merger.into_stream_merger_iter()?;
+    while let Some((key, value)) = iter.next()? {
+        if valid_lmdb_key(key) {
+            let (proximity_to_insert, word1, word2) =
+                U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
+            let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) {
+                Some(value) => {
+                    CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
+                }
+                None => continue,
+            };
+
+            let mut data_to_remove = RoaringBitmap::new();
+            for prox in 1..(MAX_DISTANCE as u8) {
+                let key = (prox, word1, word2);
+                let database_value = database.get(wtxn, &key)?.unwrap_or_default();
+                let value = if prox == proximity_to_insert {
+                    // Proximity that should be changed.
+                    // Union values and remove lower proximity data
+                    (&database_value | &data_to_insert) - &data_to_remove
+                } else {
+                    // Remove lower proximity data
+                    &database_value - &data_to_remove
+                };
+
+                // add the current data in data_to_remove for the next proximities
+                data_to_remove |= &value;
+
+                if database_value != value {
+                    database.put(wtxn, &key, &value)?;
+                }
+            }
+        }
+    }
+    Ok(())
+}
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -9,6 +9,7 @@ use itertools::{EitherOrBoth, Itertools};
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use time::OffsetDateTime;

+use super::del_add::DelAddOperation;
 use super::index_documents::{IndexDocumentsConfig, Transform};
 use super::IndexerConfig;
 use crate::criterion::Criterion;
@@ -1072,13 +1073,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
            .index
            .primary_key(self.wtxn)?
            .and_then(|name| new_inner_settings.fields_ids_map.id(name));
-        let inner_settings_diff = InnerIndexSettingsDiff {
-            old: old_inner_settings,
-            new: new_inner_settings,
+        let settings_update_only = true;
+        let inner_settings_diff = InnerIndexSettingsDiff::new(
+            old_inner_settings,
+            new_inner_settings,
            primary_key_id,
            embedding_configs_updated,
-            settings_update_only: true,
-        };
+            settings_update_only,
+        );

        if inner_settings_diff.any_reindexing_needed() {
            self.reindex(&progress_callback, &should_abort, inner_settings_diff)?;
@@ -1095,21 +1097,116 @@ pub struct InnerIndexSettingsDiff {
    // TODO: compare directly the embedders.
    pub(crate) embedding_configs_updated: bool,
    pub(crate) settings_update_only: bool,
+    /// The set of only the additional searchable fields.
+    /// If any other searchable field has been modified, is set to None.
+    pub(crate) only_additional_fields: Option<HashSet<String>>,
+
+    // Cache the check to see if all the stop_words, allowed_separators, dictionary,
+    // exact_attributes, proximity_precision are different.
+    pub(crate) cache_reindex_searchable_without_user_defined: bool,
+    // Cache the check to see if the user_defined_searchables are different.
+    pub(crate) cache_user_defined_searchables: bool,
+    // Cache the check to see if the exact_attributes are different.
+    pub(crate) cache_exact_attributes: bool,
 }

 impl InnerIndexSettingsDiff {
+    #[tracing::instrument(level = "trace", skip_all, target = "indexing::settings")]
+    pub(crate) fn new(
+        old_settings: InnerIndexSettings,
+        new_settings: InnerIndexSettings,
+        primary_key_id: Option<FieldId>,
+        embedding_configs_updated: bool,
+        settings_update_only: bool,
+    ) -> Self {
+        let only_additional_fields = match (
+            &old_settings.user_defined_searchable_fields,
+            &new_settings.user_defined_searchable_fields,
+        ) {
+            (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
+            (Some(old), Some(new)) => {
+                let old: HashSet<_> = old.iter().cloned().collect();
+                let new: HashSet<_> = new.iter().cloned().collect();
+                if old.difference(&new).next().is_none() {
+                    // if no field has been removed return only the additional ones
+                    Some(&new - &old).filter(|x| !x.is_empty())
+                } else {
+                    None
+                }
+            }
+        };
+
+        let cache_reindex_searchable_without_user_defined = {
+            old_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
+                != new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
+                || old_settings.allowed_separators != new_settings.allowed_separators
+                || old_settings.dictionary != new_settings.dictionary
+                || old_settings.proximity_precision != new_settings.proximity_precision
+        };
+
+        let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
+
+        let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
+            != new_settings.user_defined_searchable_fields;
+
+        InnerIndexSettingsDiff {
+            old: old_settings,
+            new: new_settings,
+            primary_key_id,
+            embedding_configs_updated,
+            settings_update_only,
+            only_additional_fields,
+            cache_reindex_searchable_without_user_defined,
+            cache_user_defined_searchables,
+            cache_exact_attributes,
+        }
+    }
+
+    pub fn searchable_fields_to_index(&self) -> BTreeSet<FieldId> {
+        if self.settings_update_only {
+            self.new
+                .fields_ids_map
+                .ids()
+                .filter(|id| self.reindex_searchable_id(*id).is_some())
+                .collect()
+        } else {
+            self.new.searchable_fields_ids.iter().copied().collect()
+        }
+    }
+
    pub fn any_reindexing_needed(&self) -> bool {
        self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
    }

    pub fn reindex_searchable(&self) -> bool {
-        self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
-            != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
-            || self.old.allowed_separators != self.new.allowed_separators
-            || self.old.dictionary != self.new.dictionary
-            || self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields
-            || self.old.exact_attributes != self.new.exact_attributes
-            || self.old.proximity_precision != self.new.proximity_precision
+        self.cache_reindex_searchable_without_user_defined
+            || self.cache_exact_attributes
+            || self.cache_user_defined_searchables
+    }
+
+    pub fn reindex_proximities(&self) -> bool {
+        // if any searchable settings force the reindexing
+        (self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables)
+        // and if any settings needs the proximity database created
+            && (self.old.proximity_precision == ProximityPrecision::ByAttribute
+                || self.new.proximity_precision == ProximityPrecision::ByAttribute)
+    }
+
+    pub fn reindex_searchable_id(&self, id: FieldId) -> Option<DelAddOperation> {
+        if self.cache_reindex_searchable_without_user_defined || self.cache_exact_attributes {
+            Some(DelAddOperation::DeletionAndAddition)
+        } else if let Some(only_additional_fields) = &self.only_additional_fields {
+            let additional_field = self.new.fields_ids_map.name(id).unwrap();
+            if only_additional_fields.contains(additional_field) {
+                Some(DelAddOperation::Addition)
+            } else {
+                None
+            }
+        } else if self.cache_user_defined_searchables {
+            Some(DelAddOperation::DeletionAndAddition)
+        } else {
+            None
+        }
    }

    pub fn reindex_facets(&self) -> bool {
@@ -1580,7 +1677,7 @@ mod tests {
        // When we search for something that is not in
        // the searchable fields it must not return any document.
        let result = index.search(&rtxn).query("23").execute().unwrap();
-        assert!(result.documents_ids.is_empty());
+        assert_eq!(result.documents_ids, Vec::<u32>::new());

        // When we search for something that is in the searchable fields
        // we must find the appropriate document.
--- a/workloads/embeddings-movies-subset-hf.json
+++ b/workloads/embeddings-movies-subset-hf.json
--- a/workloads/embeddings-settings-add.json
+++ b/workloads/embeddings-settings-add.json
Author	SHA1	Message	Date
ManyTheFish	9874efc352	WIP	2024-07-04 11:18:45 +02:00
meili-bors[bot]	a838f39fce	Merge #4682 4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-06-11 02:51:17 +00:00
meili-bors[bot]	7add7d053c	Merge #4689 4689: Bring back changes from v1.8.2 into v1.9.0 r=curquiza a=dureuill Co-authored-by: dureuill <dureuill@users.noreply.github.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>	2024-06-10 14:03:55 +00:00
Louis Dureuil	7559dfc814	Merge tag 'v1.8.2' into release-v1.9.0	2024-06-10 15:07:34 +02:00
meili-bors[bot]	6c6c4732a1	Merge #4681 4681: Fix concurrency issue r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4654 ## What does this PR do? - Asynchronously drop permits Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-10 09:36:08 +00:00
meili-bors[bot]	3976fe660e	Merge #4688 4688: Update version for the next release (v1.8.2) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-06-10 08:28:34 +00:00
Louis Dureuil	50f8218a5d	Asynchronously drop permits	2024-06-10 10:19:57 +02:00
dureuill	19585f1a4f	Update version for the next release (v1.8.2) in Cargo.toml	2024-06-10 07:59:36 +00:00
Clément Renault	8ec6e175e5	Replace roaring patch to the v0.10.5	2024-06-07 22:11:26 -04:00
Clément Renault	75b2e02cd2	Log more stuff around filtering	2024-06-06 11:00:07 -04:00
Clément Renault	40f05fe156	Bump roaring to the latest commit	2024-06-06 10:59:55 -04:00
Clément Renault	52d0d35b39	Revert "Reduce the universe while exploring the facet tree" because it's slower this way This reverts commit 14026115f21409535772ede0ee4273f37848dd61.	2024-06-06 09:17:51 -04:00
Clément Renault	5432776132	Reduce the universe while exploring the facet tree	2024-06-06 09:17:51 -04:00
Clément Renault	66470b27e6	Use the MultiOps trait for IN operations	2024-06-06 09:17:51 -04:00
Clément Renault	0a9bd398c7	Improve the NOT operator to use the universe when possible	2024-06-06 09:17:51 -04:00
Clément Renault	7967e93c16	Skip evaluating when a universe is empty, nothing can be found	2024-06-06 09:17:51 -04:00
Clément Renault	a6f3a01c6a	Expose the universe to do efficient intersections on deserialization	2024-06-06 09:17:51 -04:00
Clément Renault	4ca4a3f954	Make the CboRoaringBitmapCodec support intersection on deserialization	2024-06-06 09:17:51 -04:00
Clément Renault	e4a69c5ac3	Introduce the FacetGroupLazyValue type	2024-06-06 09:17:50 -04:00
Clément Renault	ff2e498267	Patch roaring to use the version supporting intersection on deserialization	2024-06-06 09:17:50 -04:00
Clément Renault	531e3d7d6a	MultiOps trait for OR operations	2024-06-06 09:17:50 -04:00
meili-bors[bot]	cb765ad249	Merge #4684 4684: Update Charabia v0.8.11 r=irevoire a=ManyTheFish # Update Charabia v0.8.11 ### Adds a new normalizer to normalize œ to oe and æ to ae Now search words containing `œ` or `æ` will be retrieved using `oe` or `ae`, like `Daemon` <=> `Dæmon` ### Fix: make `chinese-normalization-pinyin` feature flag compile Fixes #4629 Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-06-06 08:59:49 +00:00
ManyTheFish	2e50c6ec81	Update Charabia	2024-06-06 10:18:43 +02:00
meili-bors[bot]	40b2345394	Merge #4680 4680: Speedup additional searchables r=Kerollmops a=ManyTheFish Fixes #4492. ## To Do - [x] Do not call the `InnerSettingsDiff::only_additional_fields` function too many times Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-06-05 15:39:28 +00:00
ManyTheFish	30293883e0	Fix condition mistake	2024-06-05 17:30:07 +02:00
ManyTheFish	b833be46b9	Avoid running proximity when only the exact attributes changes	2024-06-05 17:30:07 +02:00
ManyTheFish	0a4118329e	Put only_additional_fields to None if the difference gives an empty result.	2024-06-05 17:30:07 +02:00
ManyTheFish	261e92d7e6	Skip iterating over documents when the faceted field list doesn't change	2024-06-05 17:30:07 +02:00
ManyTheFish	5cd08979b1	iterate over the faceted fields instead of over the whole document	2024-06-05 17:30:07 +02:00
Clément Renault	2af7e4dbe9	Rename the embeddings workloads	2024-06-05 17:30:07 +02:00
Clément Renault	a998b881f6	Cache a lot of operations to know if a field must be indexed	2024-06-05 17:30:07 +02:00
Clément Renault	b81953a65d	Add a span for the prepare_for_documents_reindexing	2024-06-05 17:30:07 +02:00
Clément Renault	091bb157f1	Add a span for the settings diff creation	2024-06-05 17:30:07 +02:00
Clément Renault	1b639ce44b	Reduce the number of complex calls to settings diff functions	2024-06-05 17:30:07 +02:00
Clément Renault	87cf8a3c94	Introduce a new way to determine the operations to perform on the fields	2024-06-05 17:30:07 +02:00
Clément Renault	0f578348f1	Introduce a dedicated function to write proximity entries in database	2024-06-05 17:30:07 +02:00
Clément Renault	fad4675abe	Give the settings diff to the write_typed_chunk_into_index function	2024-06-05 17:30:07 +02:00
Clément Renault	1ab03c4ede	Fix an issue with settings diff and * in the searchable attributes	2024-06-05 17:30:07 +02:00
Clément Renault	0c6e4b2f00	Introducing a new into_del_add_obkv_conditional_operation function	2024-06-05 17:30:07 +02:00
Clément Renault	42b3f52ef9	Introduce the SettingDiff only_additional_fields method	2024-06-05 17:30:07 +02:00
meili-bors[bot]	98e062a714	Merge #4675 4675: Update actix-web 4.5.1 -> 4.6.0 r=dureuill a=dureuill # Pull Request - actix-web 4.5.1 -> 4.6.0 - actix-http 3.6.0 -> 3.7.0 - actix-web-static-files (commit 2d3b6160) -> 4.0.1 - tracing-actix-web 0.7.9 -> 0.7.10 - brotli 3.4.0 -> 6.0.0 ## Related issue Fixes #4625 Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-05 07:40:35 +00:00
Louis Dureuil	8412665957	Update actix-web 4.5.1 -> 4.6.0	2024-06-04 09:54:30 +02:00
meili-bors[bot]	fc584f1db3	Merge #4666 4666: Add a score threshold search parameter r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4609 ## What does this PR do? - See [usage](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#95b76ded400342ba9ab3d67c734836f0) and [the known limitation](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#e4e32195bf0e4195b5daecdbb7a97a17) Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-03 08:42:44 +00:00
Louis Dureuil	2b6db6541e	Changes after review	2024-06-03 10:30:00 +02:00
Louis Dureuil	c2fb7afe59	fmt	2024-05-30 12:06:46 +02:00
Louis Dureuil	41976b82b1	Tests for ranking_score_threshold	2024-05-30 11:22:26 +02:00
Louis Dureuil	c36410fcbf	Analytics for ranking score threshold	2024-05-30 11:22:12 +02:00
Louis Dureuil	7ce2691374	Add ranking score threshold to similar API	2024-05-30 11:21:31 +02:00
Louis Dureuil	4f03b0cf5b	Add ranking score threshold to similar	2024-05-30 11:20:50 +02:00
Louis Dureuil	c26db7878c	Expose rankingScoreThreshold in API	2024-05-30 10:32:35 +02:00
Louis Dureuil	aac1d769a7	Add ranking_score_threshold to milli	2024-05-29 14:17:09 +02:00