WIP

Merge #4682
4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-07-18 12:20:48 +00:00 · 2024-07-04 11:18:45 +02:00 · 2024-06-11 02:51:17 +00:00 · 2024-06-10 14:03:55 +00:00 · 2024-06-10 15:07:34 +02:00 · 2024-06-10 09:36:08 +00:00
53 changed files with 2420 additions and 536 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -36,9 +36,9 @@ dependencies = [

 [[package]]
 name = "actix-http"
-version = "3.6.0"
+version = "3.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
+checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d"
 dependencies = [
 "actix-codec",
 "actix-rt",
@ -46,7 +46,7 @@ dependencies = [
 "actix-tls",
 "actix-utils",
 "ahash",
- "base64 0.21.7",
+ "base64 0.22.1",
 "bitflags 2.5.0",
 "brotli",
 "bytes",
@ -85,13 +85,15 @@ dependencies = [

 [[package]]
 name = "actix-router"
-version = "0.5.1"
+version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799"
+checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8"
 dependencies = [
 "bytestring",
+ "cfg-if",
 "http 0.2.11",
 "regex",
+ "regex-lite",
 "serde",
 "tracing",
 ]
@ -138,9 +140,9 @@ dependencies = [

 [[package]]
 name = "actix-tls"
-version = "3.3.0"
+version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
+checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389"
 dependencies = [
 "actix-rt",
 "actix-service",
@ -167,9 +169,9 @@ dependencies = [

 [[package]]
 name = "actix-web"
-version = "4.5.1"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
+checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32"
 dependencies = [
 "actix-codec",
 "actix-http",
@ -196,7 +198,7 @@ dependencies = [
 "mime",
 "once_cell",
 "pin-project-lite",
- "regex",
+ "regex-lite",
 "serde",
 "serde_json",
 "serde_urlencoded",
@ -220,8 +222,9 @@ dependencies = [

 [[package]]
 name = "actix-web-static-files"
-version = "3.0.5"
-source = "git+https://github.com/kilork/actix-web-static-files.git?rev=2d3b6160#2d3b6160f0de4ba061c5d76b5704f34fb677f6df"
+version = "4.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adf6d1ef6d7a60e084f9e0595e2a5234abda14e76c105ecf8e2d0e8800c41a1f"
 dependencies = [
 "actix-web",
 "derive_more",
@ -500,7 +503,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"

 [[package]]
 name = "benchmarks"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "anyhow",
 "bytes",
@ -613,9 +616,9 @@ dependencies = [

 [[package]]
 name = "brotli"
-version = "3.4.0"
+version = "6.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
+checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b"
 dependencies = [
 "alloc-no-stdlib",
 "alloc-stdlib",
@ -624,9 +627,9 @@ dependencies = [

 [[package]]
 name = "brotli-decompressor"
-version = "2.5.1"
+version = "4.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
+checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362"
 dependencies = [
 "alloc-no-stdlib",
 "alloc-stdlib",
@ -645,7 +648,7 @@ dependencies = [

 [[package]]
 name = "build-info"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "anyhow",
 "time",
@ -895,9 +898,9 @@ dependencies = [

 [[package]]
 name = "charabia"
-version = "0.8.10"
+version = "0.8.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
+checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6"
 dependencies = [
 "aho-corasick",
 "cow-utils",
@ -986,7 +989,7 @@ dependencies = [
 "anstream",
 "anstyle",
 "clap_lex",
- "strsim",
+ "strsim 0.10.0",
 ]

 [[package]]
@ -1277,12 +1280,12 @@ dependencies = [

 [[package]]
 name = "darling"
-version = "0.20.3"
+version = "0.20.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
+checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1"
 dependencies = [
- "darling_core 0.20.3",
- "darling_macro 0.20.3",
+ "darling_core 0.20.9",
+ "darling_macro 0.20.9",
 ]

 [[package]]
@ -1295,21 +1298,21 @@ dependencies = [
 "ident_case",
 "proc-macro2",
 "quote",
- "strsim",
+ "strsim 0.10.0",
 "syn 1.0.109",
 ]

 [[package]]
 name = "darling_core"
-version = "0.20.3"
+version = "0.20.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
+checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120"
 dependencies = [
 "fnv",
 "ident_case",
 "proc-macro2",
 "quote",
- "strsim",
+ "strsim 0.11.1",
 "syn 2.0.60",
 ]

@ -1326,11 +1329,11 @@ dependencies = [

 [[package]]
 name = "darling_macro"
-version = "0.20.3"
+version = "0.20.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
+checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178"
 dependencies = [
- "darling_core 0.20.3",
+ "darling_core 0.20.9",
 "quote",
 "syn 2.0.60",
 ]
@ -1383,6 +1386,15 @@ dependencies = [
 "derive_builder_macro 0.13.1",
 ]

+[[package]]
+name = "derive_builder"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
+dependencies = [
+ "derive_builder_macro 0.20.0",
+]
+
 [[package]]
 name = "derive_builder_core"
 version = "0.12.0"
@ -1407,6 +1419,18 @@ dependencies = [
 "syn 1.0.109",
 ]

+[[package]]
+name = "derive_builder_core"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
+dependencies = [
+ "darling 0.20.9",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.60",
+]
+
 [[package]]
 name = "derive_builder_macro"
 version = "0.12.0"
@ -1427,6 +1451,16 @@ dependencies = [
 "syn 1.0.109",
 ]

+[[package]]
+name = "derive_builder_macro"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
+dependencies = [
+ "derive_builder_core 0.20.0",
+ "syn 2.0.60",
+]
+
 [[package]]
 name = "derive_more"
 version = "0.99.17"
@ -1454,7 +1488,7 @@ dependencies = [
 "serde-cs",
 "serde_json",
 "serde_urlencoded",
- "strsim",
+ "strsim 0.10.0",
 ]

 [[package]]
@ -1545,7 +1579,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "anyhow",
 "big_s",
@ -1707,29 +1741,6 @@ dependencies = [
 "syn 2.0.60",
 ]

-[[package]]
-name = "env_filter"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
-dependencies = [
- "log",
- "regex",
-]
-
-[[package]]
-name = "env_logger"
-version = "0.11.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
-dependencies = [
- "anstream",
- "anstyle",
- "env_filter",
- "humantime",
- "log",
-]
-
 [[package]]
 name = "equivalent"
 version = "1.0.1"
@ -1784,7 +1795,7 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5"
 dependencies = [
- "darling 0.20.3",
+ "darling 0.20.9",
 "proc-macro2",
 "quote",
 "syn 2.0.60",
@ -1793,7 +1804,7 @@ dependencies = [

 [[package]]
 name = "file-store"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "faux",
 "tempfile",
@ -1816,7 +1827,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "insta",
 "nom",
@ -1836,7 +1847,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "criterion",
 "serde_json",
@ -1954,7 +1965,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "arbitrary",
 "clap",
@ -2379,12 +2390,6 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"

-[[package]]
-name = "humantime"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
-
 [[package]]
 name = "hyper"
 version = "0.14.27"
@ -2447,7 +2452,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"

 [[package]]
 name = "index-scheduler"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "anyhow",
 "big_s",
@ -2642,7 +2647,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "criterion",
 "serde_json",
@ -2778,9 +2783,9 @@ dependencies = [

 [[package]]
 name = "lindera"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88"
+checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f"
 dependencies = [
 "lindera-analyzer",
 "lindera-core",
@ -2791,9 +2796,9 @@ dependencies = [

 [[package]]
 name = "lindera-analyzer"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7"
+checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133"
 dependencies = [
 "anyhow",
 "bincode",
@ -2821,9 +2826,9 @@ dependencies = [

 [[package]]
 name = "lindera-cc-cedict"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d"
+checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1"
 dependencies = [
 "bincode",
 "byteorder",
@ -2835,29 +2840,21 @@ dependencies = [

 [[package]]
 name = "lindera-cc-cedict-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683"
+checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-compress"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500"
+checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c"
 dependencies = [
 "anyhow",
 "flate2",
@ -2866,9 +2863,9 @@ dependencies = [

 [[package]]
 name = "lindera-core"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8"
+checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15"
 dependencies = [
 "anyhow",
 "bincode",
@ -2883,9 +2880,9 @@ dependencies = [

 [[package]]
 name = "lindera-decompress"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e"
+checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12"
 dependencies = [
 "anyhow",
 "flate2",
@ -2894,9 +2891,9 @@ dependencies = [

 [[package]]
 name = "lindera-dictionary"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe"
+checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e"
 dependencies = [
 "anyhow",
 "bincode",
@ -2918,10 +2915,32 @@ dependencies = [
 ]

 [[package]]
-name = "lindera-filter"
-version = "0.30.0"
+name = "lindera-dictionary-builder"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d"
+checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8"
+dependencies = [
+ "anyhow",
+ "bincode",
+ "byteorder",
+ "csv",
+ "derive_builder 0.20.0",
+ "encoding",
+ "encoding_rs",
+ "encoding_rs_io",
+ "glob",
+ "lindera-compress",
+ "lindera-core",
+ "lindera-decompress",
+ "log",
+ "yada",
+]
+
+[[package]]
+name = "lindera-filter"
+version = "0.31.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a"
 dependencies = [
 "anyhow",
 "csv",
@ -2944,9 +2963,9 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25"
+checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664"
 dependencies = [
 "bincode",
 "byteorder",
@ -2958,31 +2977,21 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90"
+checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding_rs",
- "encoding_rs_io",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "serde",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-ipadic-neologd"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f"
+checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1"
 dependencies = [
 "bincode",
 "byteorder",
@ -2994,31 +3003,21 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic-neologd-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20"
+checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding_rs",
- "encoding_rs_io",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "serde",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-ko-dic"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48"
+checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db"
 dependencies = [
 "bincode",
 "byteorder",
@ -3034,29 +3033,21 @@ dependencies = [

 [[package]]
 name = "lindera-ko-dic-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577"
+checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
 name = "lindera-tokenizer"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d"
+checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14"
 dependencies = [
 "bincode",
 "lindera-core",
@ -3068,9 +3059,9 @@ dependencies = [

 [[package]]
 name = "lindera-unidic"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23"
+checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597"
 dependencies = [
 "bincode",
 "byteorder",
@ -3086,22 +3077,14 @@ dependencies = [

 [[package]]
 name = "lindera-unidic-builder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008"
+checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974"
 dependencies = [
 "anyhow",
- "bincode",
- "byteorder",
- "csv",
- "encoding",
- "env_logger",
- "glob",
- "lindera-compress",
 "lindera-core",
 "lindera-decompress",
- "log",
- "yada",
+ "lindera-dictionary-builder",
 ]

 [[package]]
@ -3272,7 +3255,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "insta",
 "md5",
@ -3281,7 +3264,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "actix-cors",
 "actix-http",
@ -3373,7 +3356,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "base64 0.21.7",
 "enum-iterator",
@ -3392,7 +3375,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "actix-web",
 "anyhow",
@ -3422,7 +3405,7 @@ dependencies = [

 [[package]]
 name = "meilitool"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "anyhow",
 "clap",
@ -3461,7 +3444,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "arroy",
 "big_s",
@ -3901,7 +3884,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "big_s",
 "serde_json",
@ -4340,6 +4323,12 @@ dependencies = [
 "regex-syntax",
 ]

+[[package]]
+name = "regex-lite"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.2"
@ -4388,12 +4377,6 @@ dependencies = [
 "winreg",
 ]

-[[package]]
-name = "retain_mut"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
-
 [[package]]
 name = "ring"
 version = "0.17.8"
@ -4411,13 +4394,12 @@ dependencies = [

 [[package]]
 name = "roaring"
-version = "0.10.2"
+version = "0.10.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
+checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8"
 dependencies = [
 "bytemuck",
 "byteorder",
- "retain_mut",
 "serde",
 ]

@ -4900,6 +4882,12 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"

+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
 [[package]]
 name = "strum"
 version = "0.26.2"
@ -5313,9 +5301,9 @@ dependencies = [

 [[package]]
 name = "tracing-actix-web"
-version = "0.7.9"
+version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
+checksum = "fa069bd1503dd526ee793bb3fce408895136c95fc86d2edb2acf1c646d7f0684"
 dependencies = [
 "actix-web",
 "mutually_exclusive_features",
@ -6052,7 +6040,7 @@ dependencies = [

 [[package]]
 name = "xtask"
-version = "1.8.0"
+version = "1.9.0"
 dependencies = [
 "anyhow",
 "build-info",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -22,7 +22,7 @@ members = [
 ]

 [workspace.package]
-version = "1.8.0"
+version = "1.9.0"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
--- a/README.md
+++ b/README.md
@ -25,7 +25,7 @@

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

-Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
+[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.

 <p align="center" name="demo">
  <a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
@ -39,8 +39,8 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
 🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥

 ## ✨ Features
-
- **Search-as-you-type:** find search results in less than 50 milliseconds
+- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
+- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
 - **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
 - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
 - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
@ -55,15 +55,15 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f

 ## 📖 Documentation

-You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
+You can consult Meilisearch's documentation at [meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).

 ## 🚀 Getting started

 For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.

-## ⚡ Supercharge your Meilisearch experience
+## 🌍 Supercharge your Meilisearch experience

-Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
+Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Additional features include analytics & monitoring in many regions around the world. No credit card is required.

 ## 🧰 SDKs & integration tools

@ -85,13 +85,13 @@ Finally, for more in-depth information, refer to our articles explaining fundame

 Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.

-To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
+To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.

 If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.

 ## 📫 Get in touch!

-Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
+Meilisearch is a search engine created by [Meili]([https://www.welcometothejungle.com/en/companies/meilisearch](https://www.meilisearch.com/careers)), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)

 🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.

--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -11,7 +11,7 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-actix-web = { version = "4.5.1", default-features = false }
+actix-web = { version = "4.6.0", default-features = false }
 anyhow = "1.0.79"
 convert_case = "0.6.0"
 csv = "1.3.0"
@ -30,7 +30,12 @@ serde_json = "1.0.111"
 tar = "0.4.40"
 tempfile = "3.9.0"
 thiserror = "1.0.56"
-time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+time = { version = "0.3.31", features = [
+    "serde-well-known",
+    "formatting",
+    "parsing",
+    "macros",
+] }
 tokio = "1.35"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }

--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@ -189,4 +189,6 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
 merge_with_error_impl_take_error_message!(ParseTaskStatusError);
 merge_with_error_impl_take_error_message!(IndexUidFormatError);
 merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
+merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
+merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
 merge_with_error_impl_take_error_message!(InvalidSimilarId);
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -241,6 +241,8 @@ InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ;
 InvalidSimilarAttributesToRetrieve    , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToRetrieve     , InvalidRequest       , BAD_REQUEST ;
+InvalidSearchRankingScoreThreshold    , InvalidRequest       , BAD_REQUEST ;
+InvalidSimilarRankingScoreThreshold   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ;
@ -505,6 +507,21 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
    }
 }

+impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
+        )
+    }
+}
+
+impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f)
+    }
+}
+
 #[macro_export]
 macro_rules! internal_error {
    ($target:ty : $($other:path), *) => {
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -14,20 +14,20 @@ default-run = "meilisearch"

 [dependencies]
 actix-cors = "0.7.0"
-actix-http = { version = "3.6.0", default-features = false, features = [
+actix-http = { version = "3.7.0", default-features = false, features = [
    "compress-brotli",
    "compress-gzip",
    "rustls-0_21",
 ] }
 actix-utils = "3.0.1"
-actix-web = { version = "4.5.1", default-features = false, features = [
+actix-web = { version = "4.6.0", default-features = false, features = [
    "macros",
    "compress-brotli",
    "compress-gzip",
    "cookies",
    "rustls-0_21",
 ] }
-actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
+actix-web-static-files = { version = "4.0.1", optional = true }
 anyhow = { version = "1.0.79", features = ["backtrace"] }
 async-stream = "0.3.5"
 async-trait = "0.1.77"
@ -105,13 +105,13 @@ url = { version = "2.5.0", features = ["serde"] }
 tracing = "0.1.40"
 tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
-tracing-actix-web = "0.7.9"
+tracing-actix-web = "0.7.10"
 build-info = { version = "1.7.0", path = "../build-info" }

 [dev-dependencies]
 actix-rt = "2.9.0"
 assert-json-diff = "2.0.2"
-brotli = "3.4.0"
+brotli = "6.0.0"
 insta = "1.34.0"
 manifest-dir-macros = "0.1.18"
 maplit = "1.0.2"
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -648,6 +648,7 @@ pub struct SearchAggregator {
    // scoring
    show_ranking_score: bool,
    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
 }

 impl SearchAggregator {
@ -676,6 +677,7 @@ impl SearchAggregator {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
@ -748,6 +750,7 @@ impl SearchAggregator {

        ret.show_ranking_score = *show_ranking_score;
        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();

        if let Some(hybrid) = hybrid {
            ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
@ -821,6 +824,7 @@ impl SearchAggregator {
            hybrid,
            total_degraded,
            total_used_negative_operator,
+            ranking_score_threshold,
        } = other;

        if self.timestamp.is_none() {
@ -904,6 +908,7 @@ impl SearchAggregator {
        // scoring
        self.show_ranking_score |= show_ranking_score;
        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
    }

    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -945,6 +950,7 @@ impl SearchAggregator {
            hybrid,
            total_degraded,
            total_used_negative_operator,
+            ranking_score_threshold,
        } = self;

        if total_received == 0 {
@ -1015,6 +1021,7 @@ impl SearchAggregator {
                "scoring": {
                    "show_ranking_score": show_ranking_score,
                    "show_ranking_score_details": show_ranking_score_details,
+                    "ranking_score_threshold": ranking_score_threshold,
                },
            });

@ -1087,6 +1094,7 @@ impl MultiSearchAggregator {
                    matching_strategy: _,
                    attributes_to_search_on: _,
                    hybrid: _,
+                    ranking_score_threshold: _,
                } = query;

                index_uid.as_str()
@ -1234,6 +1242,7 @@ impl FacetSearchAggregator {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
@ -1248,7 +1257,8 @@ impl FacetSearchAggregator {
            || filter.is_some()
            || *matching_strategy != MatchingStrategy::default()
            || attributes_to_search_on.is_some()
-            || hybrid.is_some();
+            || hybrid.is_some()
+            || ranking_score_threshold.is_some();

        ret
    }
@ -1624,6 +1634,7 @@ pub struct SimilarAggregator {
    // scoring
    show_ranking_score: bool,
    show_ranking_score_details: bool,
+    ranking_score_threshold: bool,
 }

 impl SimilarAggregator {
@ -1638,6 +1649,7 @@ impl SimilarAggregator {
            show_ranking_score,
            show_ranking_score_details,
            filter,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
@ -1675,6 +1687,7 @@ impl SimilarAggregator {

        ret.show_ranking_score = *show_ranking_score;
        ret.show_ranking_score_details = *show_ranking_score_details;
+        ret.ranking_score_threshold = ranking_score_threshold.is_some();

        ret.embedder = embedder.is_some();

@ -1708,6 +1721,7 @@ impl SimilarAggregator {
            show_ranking_score,
            show_ranking_score_details,
            embedder,
+            ranking_score_threshold,
        } = other;

        if self.timestamp.is_none() {
@ -1749,6 +1763,7 @@ impl SimilarAggregator {
        // scoring
        self.show_ranking_score |= show_ranking_score;
        self.show_ranking_score_details |= show_ranking_score_details;
+        self.ranking_score_threshold |= ranking_score_threshold;
    }

    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -1769,6 +1784,7 @@ impl SimilarAggregator {
            show_ranking_score,
            show_ranking_score_details,
            embedder,
+            ranking_score_threshold,
        } = self;

        if total_received == 0 {
@ -1808,6 +1824,7 @@ impl SimilarAggregator {
                "scoring": {
                    "show_ranking_score": show_ranking_score,
                    "show_ranking_score_details": show_ranking_score_details,
+                    "ranking_score_threshold": ranking_score_threshold,
                },
            });

--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::routes::indexes::search::search_kind;
 use crate::search::{
-    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
-    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
+    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
+    SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };
 use crate::search_queue::SearchQueue;
@ -46,6 +46,8 @@ pub struct FacetSearchQuery {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 }

 pub async fn search(
@ -103,6 +105,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = value;

        SearchQuery {
@ -128,6 +131,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            vector,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        }
    }
 }
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -19,9 +19,10 @@ use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
 use crate::search::{
-    add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
-    SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
+    add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
+    SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
 };
 use crate::search_queue::SearchQueue;

@ -82,6 +83,21 @@ pub struct SearchQueryGet {
    pub hybrid_embedder: Option<String>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
    pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
+    pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
+#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
+pub struct RankingScoreThresholdGet(RankingScoreThreshold);
+
+impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
+    type Error = InvalidSearchRankingScoreThreshold;
+
+    fn try_from(s: String) -> Result<Self, Self::Error> {
+        let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
+        Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
+    }
 }

 #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
@ -152,6 +168,7 @@ impl From<SearchQueryGet> for SearchQuery {
            matching_strategy: other.matching_strategy,
            attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
            hybrid,
+            ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
        }
    }
 }
--- a/meilisearch/src/routes/indexes/similar.rs
+++ b/meilisearch/src/routes/indexes/similar.rs
@ -6,8 +6,8 @@ use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::error::deserr_codes::{
    InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
-    InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
-    InvalidSimilarShowRankingScoreDetails,
+    InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarRankingScoreThreshold,
+    InvalidSimilarShowRankingScore, InvalidSimilarShowRankingScoreDetails,
 };
 use meilisearch_types::error::{ErrorCode as _, ResponseError};
 use meilisearch_types::index_uid::IndexUid;
@ -21,8 +21,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::search::{
-    add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
-    DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+    add_search_rules, perform_similar, RankingScoreThresholdSimilar, SearchKind, SimilarQuery,
+    SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };

 pub fn configure(cfg: &mut web::ServiceConfig) {
@ -42,9 +42,7 @@ pub async fn similar_get(
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

-    let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
-        ResponseError::from_msg(code.to_string(), code.error_code())
-    })?;
+    let query = params.0.try_into()?;

    let mut aggregate = SimilarAggregator::from_query(&query, &req);

@ -130,12 +128,27 @@ pub struct SimilarQueryGet {
    show_ranking_score: Param<bool>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
    show_ranking_score_details: Param<bool>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
    #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
    pub embedder: Option<String>,
 }

+#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
+#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
+pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
+
+impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
+    type Error = InvalidSimilarRankingScoreThreshold;
+
+    fn try_from(s: String) -> Result<Self, Self::Error> {
+        let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
+        Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
+    }
+}
+
 impl TryFrom<SimilarQueryGet> for SimilarQuery {
-    type Error = InvalidSimilarId;
+    type Error = ResponseError;

    fn try_from(
        SimilarQueryGet {
@ -147,6 +160,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
            show_ranking_score,
            show_ranking_score_details,
            embedder,
+            ranking_score_threshold,
        }: SimilarQueryGet,
    ) -> Result<Self, Self::Error> {
        let filter = match filter {
@ -158,7 +172,9 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
        };

        Ok(SimilarQuery {
-            id: id.0.try_into()?,
+            id: id.0.try_into().map_err(|code: InvalidSimilarId| {
+                ResponseError::from_msg(code.to_string(), code.error_code())
+            })?,
            offset: offset.0,
            limit: limit.0,
            filter,
@ -166,6 +182,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
            attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
            show_ranking_score: show_ranking_score.0,
            show_ranking_score_details: show_ranking_score_details.0,
+            ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
        })
    }
 }
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -87,6 +87,44 @@ pub struct SearchQuery {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
+#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
+pub struct RankingScoreThreshold(f64);
+
+impl std::convert::TryFrom<f64> for RankingScoreThreshold {
+    type Error = InvalidSearchRankingScoreThreshold;
+
+    fn try_from(f: f64) -> Result<Self, Self::Error> {
+        // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
+        #[allow(clippy::manual_range_contains)]
+        if f > 1.0 || f < 0.0 {
+            Err(InvalidSearchRankingScoreThreshold)
+        } else {
+            Ok(RankingScoreThreshold(f))
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
+#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
+pub struct RankingScoreThresholdSimilar(f64);
+
+impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
+    type Error = InvalidSimilarRankingScoreThreshold;
+
+    fn try_from(f: f64) -> Result<Self, Self::Error> {
+        // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
+        #[allow(clippy::manual_range_contains)]
+        if f > 1.0 || f < 0.0 {
+            Err(InvalidSimilarRankingScoreThreshold)
+        } else {
+            Ok(Self(f))
+        }
+    }
 }

 // Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
@ -117,6 +155,7 @@ impl fmt::Debug for SearchQuery {
            crop_marker,
            matching_strategy,
            attributes_to_search_on,
+            ranking_score_threshold,
        } = self;

        let mut debug = f.debug_struct("SearchQuery");
@ -188,6 +227,9 @@ impl fmt::Debug for SearchQuery {
        debug.field("highlight_pre_tag", &highlight_pre_tag);
        debug.field("highlight_post_tag", &highlight_post_tag);
        debug.field("crop_marker", &crop_marker);
+        if let Some(ranking_score_threshold) = ranking_score_threshold {
+            debug.field("ranking_score_threshold", &ranking_score_threshold);
+        }

        debug.finish()
    }
@ -356,6 +398,8 @@ pub struct SearchQueryWithIndex {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 }

 impl SearchQueryWithIndex {
@ -384,6 +428,7 @@ impl SearchQueryWithIndex {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = self;
        (
            index_uid,
@ -410,6 +455,7 @@ impl SearchQueryWithIndex {
                matching_strategy,
                attributes_to_search_on,
                hybrid,
+                ranking_score_threshold,
                // do not use ..Default::default() here,
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
            },
@ -436,6 +482,8 @@ pub struct SimilarQuery {
    pub show_ranking_score: bool,
    #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
    pub show_ranking_score_details: bool,
+    #[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
 }

 #[derive(Debug, Clone, PartialEq, Deserr)]
@ -477,6 +525,8 @@ pub enum MatchingStrategy {
    Last,
    /// All query words are mandatory
    All,
+    /// Remove query words from the most frequent to the least
+    Frequency,
 }

 impl Default for MatchingStrategy {
@ -490,6 +540,7 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
        match other {
            MatchingStrategy::Last => Self::Last,
            MatchingStrategy::All => Self::All,
+            MatchingStrategy::Frequency => Self::Frequency,
        }
    }
 }
@ -661,6 +712,9 @@ fn prepare_search<'t>(
 ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
    let mut search = index.search(rtxn);
    search.time_budget(time_budget);
+    if let Some(ranking_score_threshold) = query.ranking_score_threshold {
+        search.ranking_score_threshold(ranking_score_threshold.0);
+    }

    match search_kind {
        SearchKind::KeywordOnly => {
@ -702,11 +756,16 @@ fn prepare_search<'t>(
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

    search.exhaustive_number_hits(is_finite_pagination);
-    search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
-        ScoringStrategy::Detailed
-    } else {
-        ScoringStrategy::Skip
-    });
+    search.scoring_strategy(
+        if query.show_ranking_score
+            || query.show_ranking_score_details
+            || query.ranking_score_threshold.is_some()
+        {
+            ScoringStrategy::Detailed
+        } else {
+            ScoringStrategy::Skip
+        },
+    );

    // compute the offset on the limit depending on the pagination mode.
    let (offset, limit) = if is_finite_pagination {
@ -784,10 +843,6 @@ pub fn perform_search(

    let SearchQuery {
        q,
-        vector: _,
-        hybrid: _,
-        // already computed from prepare_search
-        offset: _,
        limit,
        page,
        hits_per_page,
@ -798,14 +853,19 @@ pub fn perform_search(
        show_matches_position,
        show_ranking_score,
        show_ranking_score_details,
-        filter: _,
        sort,
        facets,
        highlight_pre_tag,
        highlight_post_tag,
        crop_marker,
+        // already used in prepare_search
+        vector: _,
+        hybrid: _,
+        offset: _,
+        ranking_score_threshold: _,
        matching_strategy: _,
        attributes_to_search_on: _,
+        filter: _,
    } = query;

    let format = AttributesFormat {
@ -1067,6 +1127,7 @@ pub fn perform_similar(
        attributes_to_retrieve,
        show_ranking_score,
        show_ranking_score_details,
+        ranking_score_threshold,
    } = query;

    // using let-else rather than `?` so that the borrow checker identifies we're always returning here,
@ -1090,6 +1151,10 @@ pub fn perform_similar(
        }
    }

+    if let Some(ranking_score_threshold) = ranking_score_threshold {
+        similar.ranking_score_threshold(ranking_score_threshold.0);
+    }
+
    let milli::SearchResult {
        documents_ids,
        matching_words: _,
--- a/meilisearch/src/search_queue.rs
+++ b/meilisearch/src/search_queue.rs
@ -40,8 +40,9 @@ pub struct Permit {

 impl Drop for Permit {
    fn drop(&mut self) {
+        let sender = self.sender.clone();
        // if the channel is closed then the whole instance is down
-        let _ = futures::executor::block_on(self.sender.send(()));
+        std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
    }
 }

--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@ -321,6 +321,40 @@ async fn search_bad_facets() {
    // Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings.
 }

+#[actix_rt::test]
+async fn search_bad_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`",
+      "code": "invalid_search_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn search_invalid_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_search_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn search_non_filterable_facets() {
    let server = Server::new().await;
@ -505,7 +539,7 @@ async fn search_bad_matching_strategy() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`",
+      "message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`, `frequency`",
      "code": "invalid_search_matching_strategy",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
@ -527,7 +561,7 @@ async fn search_bad_matching_strategy() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`",
+      "message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`, `frequency`",
      "code": "invalid_search_matching_strategy",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() {
        )
        .await;
 }
+
+#[actix_rt::test]
+async fn bug_4640() {
+    // https://github.com/meilisearch/meilisearch/issues/4640
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.update_settings_filterable_attributes(json!(["_geo"])).await;
+    let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
+    index.wait_task(ret.uid()).await;
+
+    // Sort the document with the second one first
+    index
+        .search(
+            json!({
+                "sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
+            }),
+            |response, code| {
+                assert_eq!(code, 200, "{}", response);
+                snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
+                {
+                  "hits": [
+                    {
+                      "id": 2,
+                      "name": "La Bella Italia",
+                      "address": "456 Elm Street, Townsville",
+                      "type": "Italian",
+                      "rating": 9,
+                      "_geo": {
+                        "lat": "45.4777599",
+                        "lng": "9.1967508"
+                      }
+                    },
+                    {
+                      "id": 1,
+                      "name": "Taco Truck",
+                      "address": "444 Salsa Street, Burritoville",
+                      "type": "Mexican",
+                      "rating": 9,
+                      "_geo": {
+                        "lat": 34.0522,
+                        "lng": -118.2437
+                      },
+                      "_geoDistance": 9714063
+                    },
+                    {
+                      "id": 3,
+                      "name": "Crêpe Truck",
+                      "address": "2 Billig Avenue, Rouenville",
+                      "type": "French",
+                      "rating": 10
+                    }
+                  ],
+                  "query": "",
+                  "processingTimeMs": "[time]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 3
+                }
+                "###);
+            },
+        )
+        .await;
+}
--- a/meilisearch/tests/search/matching_strategy.rs
+++ b/meilisearch/tests/search/matching_strategy.rs
@ -0,0 +1,128 @@
+use meili_snap::snapshot;
+use once_cell::sync::Lazy;
+
+use crate::common::index::Index;
+use crate::common::{Server, Value};
+use crate::json;
+
+async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
+    let index = server.index("test");
+
+    index.add_documents(documents.clone(), None).await;
+    index.wait_task(0).await;
+    index
+}
+
+static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+    {
+        "title": "Shazam!",
+        "id": "1",
+    },
+    {
+        "title": "Captain Planet",
+        "id": "2",
+    },
+    {
+        "title": "Captain Marvel",
+        "id": "3",
+    },
+    {
+        "title": "a Captain Marvel ersatz",
+        "id": "4"
+    },
+    {
+        "title": "He's not part of the Marvel Cinematic Universe",
+        "id": "5"
+    },
+    {
+        "title": "a Shazam ersatz, but better than Captain Planet",
+        "id": "6"
+    },
+    {
+        "title": "Capitain CAAAAAVEEERNE!!!!",
+        "id": "7"
+    }
+    ])
+});
+
+#[actix_rt::test]
+async fn simple_search() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+
+    index
+        .search(json!({"q": "Captain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"2"},{"id":"6"},{"id":"7"}]"###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "Captain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "Captain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn search_with_typo() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+
+    index
+        .search(json!({"q": "Capitain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"7"},{"id":"2"},{"id":"6"}]"###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "Capitain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "Capitain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn search_with_unknown_word() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+
+    index
+        .search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"2"},{"id":"3"},{"id":"4"},{"id":"6"},{"id":"7"}]"###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @"[]");
+        })
+        .await;
+
+    index
+        .search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
+        })
+        .await;
+}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@ -7,6 +7,7 @@ mod facet_search;
 mod formatted;
 mod geo;
 mod hybrid;
+mod matching_strategy;
 mod multi;
 mod pagination;
 mod restrict_searchable;
@ -47,6 +48,31 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    ])
 });

+static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+        {
+            "title": "Batman the dark knight returns: Part 1",
+            "id": "A",
+        },
+        {
+            "title": "Batman the dark knight returns: Part 2",
+            "id": "B",
+        },
+        {
+            "title": "Batman Returns",
+            "id": "C",
+        },
+        {
+            "title": "Batman",
+            "id": "D",
+        },
+        {
+            "title": "Badman",
+            "id": "E",
+        }
+    ])
+});
+
 static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
        {
@ -959,6 +985,213 @@ async fn test_score_details() {
        .await;
 }

+#[actix_rt::test]
+async fn test_score() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = SCORE_DOCUMENTS.clone();
+
+    let res = index.add_documents(json!(documents), None).await;
+    index.wait_task(res.0.uid()).await;
+
+    index
+        .search(
+            json!({
+                "q": "Badman the dark knight returns 1",
+                "showRankingScore": true,
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.9746605609456898
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.8055252965383685
+                  },
+                  {
+                    "title": "Badman",
+                    "id": "E",
+                    "_rankingScore": 0.16666666666666666
+                  },
+                  {
+                    "title": "Batman Returns",
+                    "id": "C",
+                    "_rankingScore": 0.07702020202020202
+                  },
+                  {
+                    "title": "Batman",
+                    "id": "D",
+                    "_rankingScore": 0.07702020202020202
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+}
+
+#[actix_rt::test]
+async fn test_score_threshold() {
+    let query = "Badman dark returns 1";
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = SCORE_DOCUMENTS.clone();
+
+    let res = index.add_documents(json!(documents), None).await;
+    index.wait_task(res.0.uid()).await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.0
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5");
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.6685627880184332
+                  },
+                  {
+                    "title": "Badman",
+                    "id": "E",
+                    "_rankingScore": 0.25
+                  },
+                  {
+                    "title": "Batman Returns",
+                    "id": "C",
+                    "_rankingScore": 0.11553030303030302
+                  },
+                  {
+                    "title": "Batman",
+                    "id": "D",
+                    "_rankingScore": 0.11553030303030302
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.2
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###);
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.6685627880184332
+                  },
+                  {
+                    "title": "Badman",
+                    "id": "E",
+                    "_rankingScore": 0.25
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.5
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###);
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  },
+                  {
+                    "title": "Batman the dark knight returns: Part 2",
+                    "id": "B",
+                    "_rankingScore": 0.6685627880184332
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 0.8
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###);
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Batman the dark knight returns: Part 1",
+                    "id": "A",
+                    "_rankingScore": 0.93430081300813
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .search(
+            json!({
+                "q": query,
+                "showRankingScore": true,
+                "rankingScoreThreshold": 1.0
+            }),
+            |response, code| {
+                meili_snap::snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###);
+                // nobody is perfect
+                meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
+            },
+        )
+        .await;
+}
+
 #[actix_rt::test]
 async fn test_degraded_score_details() {
    let server = Server::new().await;
--- a/meilisearch/tests/similar/errors.rs
+++ b/meilisearch/tests/similar/errors.rs
@ -87,6 +87,68 @@ async fn similar_bad_id() {
    "###);
 }

+#[actix_rt::test]
+async fn similar_bad_ranking_score_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+    server.set_features(json!({"vectorStore": true})).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+        "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            }
+        },
+        "filterableAttributes": ["title"]}))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await;
+
+    let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`",
+      "code": "invalid_similar_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn similar_invalid_ranking_score_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+    server.set_features(json!({"vectorStore": true})).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+        "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            }
+        },
+        "filterableAttributes": ["title"]}))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await;
+
+    let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_similar_ranking_score_threshold",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn similar_invalid_id() {
    let server = Server::new().await;
--- a/meilisearch/tests/similar/mod.rs
+++ b/meilisearch/tests/similar/mod.rs
@ -194,6 +194,235 @@ async fn basic() {
        .await;
 }

+#[actix_rt::test]
+async fn ranking_score_threshold() {
+    let server = Server::new().await;
+    let index = server.index("test");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+        "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            }
+        },
+        "filterableAttributes": ["title"]}))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await;
+
+    let documents = DOCUMENTS.clone();
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(value.uid()).await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  },
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6,
+                        0.8,
+                        -0.2
+                      ]
+                    },
+                    "_rankingScore": 0.39060014486312866
+                  },
+                  {
+                    "title": "How to Train Your Dragon: The Hidden World",
+                    "release_year": 2019,
+                    "id": "166428",
+                    "_vectors": {
+                      "manual": [
+                        0.7,
+                        0.7,
+                        -0.4
+                      ]
+                    },
+                    "_rankingScore": 0.2819308042526245
+                  },
+                  {
+                    "title": "Shazam!",
+                    "release_year": 2019,
+                    "id": "287947",
+                    "_vectors": {
+                      "manual": [
+                        0.8,
+                        0.4,
+                        -0.5
+                      ]
+                    },
+                    "_rankingScore": 0.1662663221359253
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  },
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6,
+                        0.8,
+                        -0.2
+                      ]
+                    },
+                    "_rankingScore": 0.39060014486312866
+                  },
+                  {
+                    "title": "How to Train Your Dragon: The Hidden World",
+                    "release_year": 2019,
+                    "id": "166428",
+                    "_vectors": {
+                      "manual": [
+                        0.7,
+                        0.7,
+                        -0.4
+                      ]
+                    },
+                    "_rankingScore": 0.2819308042526245
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  },
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6,
+                        0.8,
+                        -0.2
+                      ]
+                    },
+                    "_rankingScore": 0.39060014486312866
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Escape Room",
+                    "release_year": 2019,
+                    "id": "522681",
+                    "_vectors": {
+                      "manual": [
+                        0.1,
+                        0.6,
+                        0.8
+                      ]
+                    },
+                    "_rankingScore": 0.890957772731781
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+
+    index
+        .similar(
+            json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @"[]");
+            },
+        )
+        .await;
+}
+
 #[actix_rt::test]
 async fn filter() {
    let server = Server::new().await;
--- a/meilisearch/tests/snapshot/mod.rs
+++ b/meilisearch/tests/snapshot/mod.rs
@ -31,6 +31,7 @@ macro_rules! verify_snapshot {
 }

 #[actix_rt::test]
+#[cfg_attr(target_os = "windows", ignore)]
 async fn perform_snapshot() {
    let temp = tempfile::tempdir().unwrap();
    let snapshot_dir = tempfile::tempdir().unwrap();
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.9.0"
 bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.8.10", default-features = false }
+charabia = { version = "0.8.11", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.11"
 deserr = "0.6.1"
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@ -66,6 +66,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                &mut DefaultSearchLogger,
                logger,
                TimeBudget::max(),
+                None,
            )?;
            if let Some((logger, dir)) = detailed_logger {
                logger.finish(&mut ctx, Path::new(dir))?;
--- a/milli/src/heed_codec/facet/mod.rs
+++ b/milli/src/heed_codec/facet/mod.rs
@ -47,6 +47,12 @@ pub struct FacetGroupValue {
    pub bitmap: RoaringBitmap,
 }

+#[derive(Debug)]
+pub struct FacetGroupLazyValue<'b> {
+    pub size: u8,
+    pub bitmap_bytes: &'b [u8],
+}
+
 pub struct FacetGroupKeyCodec<T> {
    _phantom: PhantomData<T>,
 }
@ -69,6 +75,7 @@ where
        Ok(Cow::Owned(v))
    }
 }
+
 impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
 where
    T: BytesDecode<'a>,
@ -84,6 +91,7 @@ where
 }

 pub struct FacetGroupValueCodec;
+
 impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
    type EItem = FacetGroupValue;

@ -93,11 +101,23 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
        Ok(Cow::Owned(v))
    }
 }
+
 impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
    type DItem = FacetGroupValue;
+
    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
        let size = bytes[0];
        let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
        Ok(FacetGroupValue { size, bitmap })
    }
 }
+
+pub struct FacetGroupLazyValueCodec;
+
+impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
+    type DItem = FacetGroupLazyValue<'a>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
+    }
+}
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -1,5 +1,5 @@
 use std::borrow::Cow;
-use std::io;
+use std::io::{self, Cursor};
 use std::mem::size_of;

 use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec {
        }
    }

+    pub fn intersection_with_serialized(
+        mut bytes: &[u8],
+        other: &RoaringBitmap,
+    ) -> io::Result<RoaringBitmap> {
+        // See above `deserialize_from` method for implementation details.
+        if bytes.len() <= THRESHOLD * size_of::<u32>() {
+            let mut bitmap = RoaringBitmap::new();
+            while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
+                if other.contains(integer) {
+                    bitmap.insert(integer);
+                }
+            }
+            Ok(bitmap)
+        } else {
+            other.intersection_with_serialized_unchecked(Cursor::new(bytes))
+        }
+    }
+
    /// Merge serialized CboRoaringBitmaps in a buffer.
    ///
    /// if the merged values length is under the threshold, values are directly
--- a/milli/src/search/facet/facet_distribution_iter.rs
+++ b/milli/src/search/facet/facet_distribution_iter.rs
@ -38,7 +38,7 @@ where
        field_id,
    )?;

-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
        Ok(())
    } else {
@ -81,7 +81,7 @@ where
        field_id,
    )?;

-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        // We first fill the heap with values from the highest level
        let starting_key =
            FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
--- a/milli/src/search/facet/facet_range_search.rs
+++ b/milli/src/search/facet/facet_range_search.rs
@ -4,9 +4,11 @@ use heed::BytesEncode;
 use roaring::RoaringBitmap;

 use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
 use crate::heed_codec::BytesRefCodec;
-use crate::Result;
+use crate::{CboRoaringBitmapCodec, Result};

 /// Find all the document ids for which the given field contains a value contained within
 /// the two bounds.
@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
    field_id: u16,
    left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
    right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
+    universe: Option<&RoaringBitmap>,
    docids: &mut RoaringBitmap,
 ) -> Result<()>
 where
@ -46,13 +49,15 @@ where
        }
        Bound::Unbounded => Bound::Unbounded,
    };
-    let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
-    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
+    let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
+    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
    let highest_level = get_highest_level(rtxn, db, field_id)?;

-    if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(starting_left_bound) =
+        get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
+    {
        let rightmost_bound =
-            Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
+            Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
        let group_size = usize::MAX;
        f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
        Ok(())
@ -64,12 +69,16 @@ where
 /// Fetch the document ids that have a facet with a value between the two given bounds
 struct FacetRangeSearch<'t, 'b, 'bitmap> {
    rtxn: &'t heed::RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
    field_id: u16,
    left: Bound<&'b [u8]>,
    right: Bound<&'b [u8]>,
+    /// The subset of documents ids that are useful for this search.
+    /// Great performance optimizations can be achieved by only fetching values matching this subset.
+    universe: Option<&'bitmap RoaringBitmap>,
    docids: &'bitmap mut RoaringBitmap,
 }
+
 impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
    fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
        let left_key =
@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
            }

            if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
-                *self.docids |= value.bitmap;
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
+                };
            }
        }
        Ok(())
@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
                left_condition && right_condition
            };
            if should_take_whole_group {
-                *self.docids |= &previous_value.bitmap;
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        previous_value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+                };
                previous_key = next_key;
                previous_value = next_value;
                continue;
@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
            left_condition && right_condition
        };
        if should_take_whole_group {
-            *self.docids |= &previous_value.bitmap;
+            *self.docids |= match self.universe {
+                Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                    previous_value.bitmap_bytes,
+                    universe,
+                )?,
+                None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+            };
        } else {
            let level = level - 1;
            let starting_left_bound = previous_key.left_bound;
@ -365,6 +392,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -384,6 +412,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -418,6 +447,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -439,6 +469,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -474,6 +505,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -499,6 +531,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -537,6 +570,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -556,6 +590,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -571,6 +606,7 @@ mod tests {
                0,
                &Bound::Unbounded,
                &Bound::Unbounded,
+                None,
                &mut docids,
            )
            .unwrap();
@ -586,6 +622,7 @@ mod tests {
                1,
                &Bound::Unbounded,
                &Bound::Unbounded,
+                None,
                &mut docids,
            )
            .unwrap();
@ -621,6 +658,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@ -634,6 +672,7 @@ mod tests {
                    1,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
--- a/milli/src/search/facet/facet_sort_ascending.rs
+++ b/milli/src/search/facet/facet_sort_ascending.rs
@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>(
    candidates: RoaringBitmap,
 ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
    let highest_level = get_highest_level(rtxn, db, field_id)?;
-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
        let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);

--- a/milli/src/search/facet/facet_sort_descending.rs
+++ b/milli/src/search/facet/facet_sort_descending.rs
@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>(
    candidates: RoaringBitmap,
 ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
    let highest_level = get_highest_level(rtxn, db, field_id)?;
-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
-        let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
+        let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
        let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
        let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
        Ok(itertools::Either::Left(DescendingFacetSort {
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
 pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
-use roaring::RoaringBitmap;
+use roaring::{MultiOps, RoaringBitmap};
 use serde_json::Value;

 use super::facet_range_search;
@ -224,14 +224,14 @@ impl<'a> Filter<'a> {
    pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
        let filterable_fields = index.filterable_fields(rtxn)?;
-
-        self.inner_evaluate(rtxn, index, &filterable_fields)
+        self.inner_evaluate(rtxn, index, &filterable_fields, None)
    }

    fn evaluate_operator(
        rtxn: &heed::RoTxn,
        index: &Index,
        field_id: FieldId,
+        universe: Option<&RoaringBitmap>,
        operator: &Condition<'a>,
    ) -> Result<RoaringBitmap> {
        let numbers_db = index.facet_id_f64_docids;
@ -291,14 +291,22 @@ impl<'a> Filter<'a> {
            }
            Condition::NotEqual(val) => {
                let operator = Condition::Equal(val.clone());
-                let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
+                let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
                let all_ids = index.documents_ids(rtxn)?;
                return Ok(all_ids - docids);
            }
        };

        let mut output = RoaringBitmap::new();
-        Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
+        Self::explore_facet_number_levels(
+            rtxn,
+            numbers_db,
+            field_id,
+            left,
+            right,
+            universe,
+            &mut output,
+        )?;
        Ok(output)
    }

@ -310,6 +318,7 @@ impl<'a> Filter<'a> {
        field_id: FieldId,
        left: Bound<f64>,
        right: Bound<f64>,
+        universe: Option<&RoaringBitmap>,
        output: &mut RoaringBitmap,
    ) -> Result<()> {
        match (left, right) {
@ -321,7 +330,7 @@ impl<'a> Filter<'a> {
            (_, _) => (),
        }
        facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
-            rtxn, db, field_id, &left, &right, output,
+            rtxn, db, field_id, &left, &right, universe, output,
        )?;

        Ok(())
@ -332,31 +341,37 @@ impl<'a> Filter<'a> {
        rtxn: &heed::RoTxn,
        index: &Index,
        filterable_fields: &HashSet<String>,
+        universe: Option<&RoaringBitmap>,
    ) -> Result<RoaringBitmap> {
+        if universe.map_or(false, |u| u.is_empty()) {
+            return Ok(RoaringBitmap::new());
+        }
+
        match &self.condition {
            FilterCondition::Not(f) => {
-                let all_ids = index.documents_ids(rtxn)?;
                let selected = Self::inner_evaluate(
                    &(f.as_ref().clone()).into(),
                    rtxn,
                    index,
                    filterable_fields,
+                    universe,
                )?;
-                Ok(all_ids - selected)
+                match universe {
+                    Some(universe) => Ok(universe - selected),
+                    None => {
+                        let all_ids = index.documents_ids(rtxn)?;
+                        Ok(all_ids - selected)
+                    }
+                }
            }
            FilterCondition::In { fid, els } => {
                if crate::is_faceted(fid.value(), filterable_fields) {
                    let field_ids_map = index.fields_ids_map(rtxn)?;
-
                    if let Some(fid) = field_ids_map.id(fid.value()) {
-                        let mut bitmap = RoaringBitmap::new();
-
-                        for el in els {
-                            let op = Condition::Equal(el.clone());
-                            let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
-                            bitmap |= el_bitmap;
-                        }
-                        Ok(bitmap)
+                        els.iter()
+                            .map(|el| Condition::Equal(el.clone()))
+                            .map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
+                            .union()
                    } else {
                        Ok(RoaringBitmap::new())
                    }
@ -371,7 +386,7 @@ impl<'a> Filter<'a> {
                if crate::is_faceted(fid.value(), filterable_fields) {
                    let field_ids_map = index.fields_ids_map(rtxn)?;
                    if let Some(fid) = field_ids_map.id(fid.value()) {
-                        Self::evaluate_operator(rtxn, index, fid, op)
+                        Self::evaluate_operator(rtxn, index, fid, universe, op)
                    } else {
                        Ok(RoaringBitmap::new())
                    }
@ -382,14 +397,11 @@ impl<'a> Filter<'a> {
                    }))?
                }
            }
-            FilterCondition::Or(subfilters) => {
-                let mut bitmap = RoaringBitmap::new();
-                for f in subfilters {
-                    bitmap |=
-                        Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
-                }
-                Ok(bitmap)
-            }
+            FilterCondition::Or(subfilters) => subfilters
+                .iter()
+                .cloned()
+                .map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
+                .union(),
            FilterCondition::And(subfilters) => {
                let mut subfilters_iter = subfilters.iter();
                if let Some(first_subfilter) = subfilters_iter.next() {
@ -398,16 +410,21 @@ impl<'a> Filter<'a> {
                        rtxn,
                        index,
                        filterable_fields,
+                        universe,
                    )?;
                    for f in subfilters_iter {
                        if bitmap.is_empty() {
                            return Ok(bitmap);
                        }
+                        // TODO We are doing the intersections two times,
+                        //      it could be more efficient
+                        //      Can't I just replace this `&=` by an `=`?
                        bitmap &= Self::inner_evaluate(
                            &(f.clone()).into(),
                            rtxn,
                            index,
                            filterable_fields,
+                            Some(&bitmap),
                        )?;
                    }
                    Ok(bitmap)
@ -507,6 +524,7 @@ impl<'a> Filter<'a> {
                        rtxn,
                        index,
                        filterable_fields,
+                        universe,
                    )?;

                    let geo_lng_token = Token::new(
@ -539,6 +557,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?;

                        let condition_right = FilterCondition::Condition {
@ -552,6 +571,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?;

                        left | right
@ -567,6 +587,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?
                    };

--- a/milli/src/search/facet/mod.rs
+++ b/milli/src/search/facet/mod.rs
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
 pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
 pub use self::filter::{BadGeoError, Filter};
 pub use self::search::{FacetValueHit, SearchForFacetValues};
-use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
+use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
 use crate::heed_codec::BytesRefCodec;
 use crate::{Index, Result};

@ -54,9 +54,9 @@ pub fn facet_max_value<'t>(
 }

 /// Get the first facet value in the facet database
-pub(crate) fn get_first_facet_value<'t, BoundCodec>(
+pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
    txn: &'t RoTxn,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<Option<BoundCodec::DItem>>
 where
@ -78,9 +78,9 @@ where
 }

 /// Get the last facet value in the facet database
-pub(crate) fn get_last_facet_value<'t, BoundCodec>(
+pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
    txn: &'t RoTxn,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<Option<BoundCodec::DItem>>
 where
@ -102,9 +102,9 @@ where
 }

 /// Get the height of the highest level in the facet database
-pub(crate) fn get_highest_level<'t>(
+pub(crate) fn get_highest_level<'t, DC>(
    txn: &'t RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<u8> {
    let field_id_prefix = &field_id.to_be_bytes();
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@ -169,6 +169,7 @@ impl<'a> Search<'a> {
            index: self.index,
            semantic: self.semantic.clone(),
            time_budget: self.time_budget.clone(),
+            ranking_score_threshold: self.ranking_score_threshold,
        };

        let semantic = search.semantic.take();
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -50,6 +50,7 @@ pub struct Search<'a> {
    index: &'a Index,
    semantic: Option<SemanticSearch>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 }

 impl<'a> Search<'a> {
@ -70,6 +71,7 @@ impl<'a> Search<'a> {
            index,
            semantic: None,
            time_budget: TimeBudget::max(),
+            ranking_score_threshold: None,
        }
    }

@ -146,6 +148,11 @@ impl<'a> Search<'a> {
        self
    }

+    pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Search<'a> {
+        self.ranking_score_threshold = Some(ranking_score_threshold);
+        self
+    }
+
    pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
        if has_vector_search {
            let ctx = SearchContext::new(self.index, self.rtxn)?;
@ -184,6 +191,7 @@ impl<'a> Search<'a> {
                    embedder_name,
                    embedder,
                    self.time_budget.clone(),
+                    self.ranking_score_threshold,
                )?
            }
            _ => execute_search(
@ -201,6 +209,7 @@ impl<'a> Search<'a> {
                &mut DefaultSearchLogger,
                &mut DefaultSearchLogger,
                self.time_budget.clone(),
+                self.ranking_score_threshold,
            )?,
        };

@ -239,6 +248,7 @@ impl fmt::Debug for Search<'_> {
            index: _,
            semantic,
            time_budget,
+            ranking_score_threshold,
        } = self;
        f.debug_struct("Search")
            .field("query", query)
@ -257,6 +267,7 @@ impl fmt::Debug for Search<'_> {
                &semantic.as_ref().map(|semantic| &semantic.embedder_name),
            )
            .field("time_budget", time_budget)
+            .field("ranking_score_threshold", ranking_score_threshold)
            .finish()
    }
 }
@ -277,6 +288,8 @@ pub enum TermsMatchingStrategy {
    Last,
    // all words are mandatory
    All,
+    // remove more frequent word first
+    Frequency,
 }

 impl Default for TermsMatchingStrategy {
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -28,6 +28,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    scoring_strategy: ScoringStrategy,
    logger: &mut dyn SearchLogger<Q>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<BucketSortOutput> {
    logger.initial_query(query);
    logger.ranking_rules(&ranking_rules);
@ -164,7 +165,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            loop {
                let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
                ranking_rule_scores.push(ScoreDetails::Skipped);
+
+                // remove candidates from the universe without adding them to result if their score is below the threshold
+                if let Some(ranking_score_threshold) = ranking_score_threshold {
+                    let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+                    if current_score < ranking_score_threshold {
+                        all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
+                        back!();
+                        continue;
+                    }
+                }
+
                maybe_add_to_results!(bucket);
+
                ranking_rule_scores.pop();

                if cur_ranking_rule_index == 0 {
@ -220,6 +233,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        debug_assert!(
            ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates)
        );
+
+        // remove candidates from the universe without adding them to result if their score is below the threshold
+        if let Some(ranking_score_threshold) = ranking_score_threshold {
+            let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+            if current_score < ranking_score_threshold {
+                all_candidates -=
+                    next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
+                back!();
+                continue;
+            }
+        }
+
        ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;

        if cur_ranking_rule_index == ranking_rules_len - 1
--- a/milli/src/search/new/graph_based_ranking_rule.rs
+++ b/milli/src/search/new/graph_based_ranking_rule.rs
@ -164,6 +164,21 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
                    }
                    costs
                }
+                TermsMatchingStrategy::Frequency => {
+                    let removal_order =
+                        query_graph.removal_order_for_terms_matching_strategy_frequency(ctx)?;
+                    let mut forbidden_nodes =
+                        SmallBitmap::for_interned_values_in(&query_graph.nodes);
+                    let mut costs = query_graph.nodes.map(|_| None);
+                    // FIXME: this works because only words uses termsmatchingstrategy at the moment.
+                    for ns in removal_order {
+                        for n in ns.iter() {
+                            *costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
+                        }
+                        forbidden_nodes.union(&ns);
+                    }
+                    costs
+                }
                TermsMatchingStrategy::All => query_graph.nodes.map(|_| None),
            }
        } else {
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@ -523,6 +523,7 @@ mod tests {
                &mut crate::DefaultSearchLogger,
                &mut crate::DefaultSearchLogger,
                TimeBudget::max(),
+                None,
            )
            .unwrap();

--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -197,6 +197,11 @@ fn resolve_maximally_reduced_query_graph(
            .iter()
            .flat_map(|x| x.iter())
            .collect(),
+        TermsMatchingStrategy::Frequency => query_graph
+            .removal_order_for_terms_matching_strategy_frequency(ctx)?
+            .iter()
+            .flat_map(|x| x.iter())
+            .collect(),
        TermsMatchingStrategy::All => vec![],
    };
    graph.remove_nodes_keep_edges(&nodes_to_remove);
@ -543,6 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
    Ok(())
 }

+#[tracing::instrument(level = "trace", skip_all, target = "search")]
 pub fn filtered_universe(
    index: &Index,
    txn: &RoTxn<'_>,
@ -568,6 +574,7 @@ pub fn execute_vector_search(
    embedder_name: &str,
    embedder: &Embedder,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<PartialSearchResult> {
    check_sort_criteria(ctx, sort_criteria.as_ref())?;

@ -597,6 +604,7 @@ pub fn execute_vector_search(
        scoring_strategy,
        placeholder_search_logger,
        time_budget,
+        ranking_score_threshold,
    )?;

    Ok(PartialSearchResult {
@ -626,6 +634,7 @@ pub fn execute_search(
    placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
    query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<PartialSearchResult> {
    check_sort_criteria(ctx, sort_criteria.as_ref())?;

@ -714,6 +723,7 @@ pub fn execute_search(
            scoring_strategy,
            query_graph_logger,
            time_budget,
+            ranking_score_threshold,
        )?
    } else {
        let ranking_rules =
@ -728,6 +738,7 @@ pub fn execute_search(
            scoring_strategy,
            placeholder_search_logger,
            time_budget,
+            ranking_score_threshold,
        )?
    };

--- a/milli/src/search/new/query_graph.rs
+++ b/milli/src/search/new/query_graph.rs
@ -1,8 +1,9 @@
-use std::cmp::Ordering;
+use std::cmp::{Ordering, Reverse};
 use std::collections::BTreeMap;
 use std::hash::{Hash, Hasher};

 use fxhash::{FxHashMap, FxHasher};
+use roaring::RoaringBitmap;

 use super::interner::{FixedSizeInterner, Interned};
 use super::query_term::{
@ -11,6 +12,7 @@ use super::query_term::{
 use super::small_bitmap::SmallBitmap;
 use super::SearchContext;
 use crate::search::new::interner::Interner;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
 use crate::Result;

 /// A node of the [`QueryGraph`].
@ -290,6 +292,49 @@ impl QueryGraph {
        }
    }

+    pub fn removal_order_for_terms_matching_strategy_frequency(
+        &self,
+        ctx: &mut SearchContext,
+    ) -> Result<Vec<SmallBitmap<QueryNode>>> {
+        // lookup frequency for each term
+        let mut term_with_frequency: Vec<(u8, u64)> = {
+            let mut term_docids: BTreeMap<u8, RoaringBitmap> = Default::default();
+            for (_, node) in self.nodes.iter() {
+                match &node.data {
+                    QueryNodeData::Term(t) => {
+                        let docids = compute_query_term_subset_docids(ctx, &t.term_subset)?;
+                        for id in t.term_ids.clone() {
+                            term_docids
+                                .entry(id)
+                                .and_modify(|curr| *curr |= &docids)
+                                .or_insert_with(|| docids.clone());
+                        }
+                    }
+                    QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
+                }
+            }
+            term_docids
+                .into_iter()
+                .map(|(idx, docids)| match docids.len() {
+                    0 => (idx, u64::max_value()),
+                    frequency => (idx, frequency),
+                })
+                .collect()
+        };
+        term_with_frequency.sort_by_key(|(_, frequency)| Reverse(*frequency));
+        let mut term_weight = BTreeMap::new();
+        let mut weight: u16 = 1;
+        let mut peekable = term_with_frequency.into_iter().peekable();
+        while let Some((idx, frequency)) = peekable.next() {
+            term_weight.insert(idx, weight);
+            if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
+                weight += 1;
+            }
+        }
+        let cost_of_term_idx = move |term_idx: u8| *term_weight.get(&term_idx).unwrap();
+        Ok(self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx))
+    }
+
    pub fn removal_order_for_terms_matching_strategy_last(
        &self,
        ctx: &SearchContext,
@ -315,10 +360,19 @@ impl QueryGraph {
        if first_term_idx >= last_term_idx {
            return vec![];
        }
+
        let cost_of_term_idx = |term_idx: u8| {
            let rank = 1 + last_term_idx - term_idx;
            rank as u16
        };
+        self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx)
+    }
+
+    pub fn removal_order_for_terms_matching_strategy(
+        &self,
+        ctx: &SearchContext,
+        order: impl Fn(u8) -> u16,
+    ) -> Vec<SmallBitmap<QueryNode>> {
        let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
        let mut at_least_one_mandatory_term = false;
        for (node_id, node) in self.nodes.iter() {
@ -329,7 +383,7 @@ impl QueryGraph {
            }
            let mut cost = 0;
            for id in t.term_ids.clone() {
-                cost = std::cmp::max(cost, cost_of_term_idx(id));
+                cost = std::cmp::max(cost, order(id));
            }
            nodes_to_remove
                .entry(cost)
--- a/milli/src/search/similar.rs
+++ b/milli/src/search/similar.rs
@ -17,6 +17,7 @@ pub struct Similar<'a> {
    index: &'a Index,
    embedder_name: String,
    embedder: Arc<Embedder>,
+    ranking_score_threshold: Option<f64>,
 }

 impl<'a> Similar<'a> {
@ -29,7 +30,17 @@ impl<'a> Similar<'a> {
        embedder_name: String,
        embedder: Arc<Embedder>,
    ) -> Self {
-        Self { id, filter: None, offset, limit, rtxn, index, embedder_name, embedder }
+        Self {
+            id,
+            filter: None,
+            offset,
+            limit,
+            rtxn,
+            index,
+            embedder_name,
+            embedder,
+            ranking_score_threshold: None,
+        }
    }

    pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self {
@ -37,8 +48,18 @@ impl<'a> Similar<'a> {
        self
    }

+    pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Self {
+        self.ranking_score_threshold = Some(ranking_score_threshold);
+        self
+    }
+
    pub fn execute(&self) -> Result<SearchResult> {
-        let universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
+        let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
+
+        // we never want to receive the docid
+        universe.remove(self.id);
+
+        let universe = universe;

        let embedder_index =
            self.index
@ -77,6 +98,8 @@ impl<'a> Similar<'a> {
        let mut documents_seen = RoaringBitmap::new();
        documents_seen.insert(self.id);

+        let mut candidates = universe;
+
        for (docid, distance) in results
            .into_iter()
            // skip documents we've already seen & mark that we saw the current document
@ -85,8 +108,6 @@ impl<'a> Similar<'a> {
            // take **after** filter and skip so that we get exactly limit elements if available
            .take(self.limit)
        {
-            documents_ids.push(docid);
-
            let score = 1.0 - distance;
            let score = self
                .embedder
@ -94,14 +115,28 @@ impl<'a> Similar<'a> {
                .map(|distribution| distribution.shift(score))
                .unwrap_or(score);

-            let score = ScoreDetails::Vector(score_details::Vector { similarity: Some(score) });
+            let score_details =
+                vec![ScoreDetails::Vector(score_details::Vector { similarity: Some(score) })];

-            document_scores.push(vec![score]);
+            let score = ScoreDetails::global_score(score_details.iter());
+
+            if let Some(ranking_score_threshold) = &self.ranking_score_threshold {
+                if score < *ranking_score_threshold {
+                    // this document is no longer a candidate
+                    candidates.remove(docid);
+                    // any document after this one is no longer a candidate either, so restrict the set to documents already seen.
+                    candidates &= documents_seen;
+                    break;
+                }
+            }
+
+            documents_ids.push(docid);
+            document_scores.push(score_details);
        }

        Ok(SearchResult {
            matching_words: Default::default(),
-            candidates: universe,
+            candidates,
            documents_ids,
            document_scores,
            degraded: false,
--- a/milli/src/update/del_add.rs
+++ b/milli/src/update/del_add.rs
@ -40,11 +40,26 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
    operation: DelAddOperation,
    buffer: &mut Vec<u8>,
 ) -> Result<(), std::io::Error> {
+    into_del_add_obkv_conditional_operation(reader, buffer, |_| operation)
+}
+
+/// Akin to the [into_del_add_obkv] function but lets you
+/// conditionally define the `DelAdd` variant based on the obkv key.
+pub fn into_del_add_obkv_conditional_operation<K, F>(
+    reader: obkv::KvReader<K>,
+    buffer: &mut Vec<u8>,
+    operation: F,
+) -> std::io::Result<()>
+where
+    K: obkv::Key + PartialOrd,
+    F: Fn(K) -> DelAddOperation,
+{
    let mut writer = obkv::KvWriter::new(buffer);
    let mut value_buffer = Vec::new();
    for (key, value) in reader.iter() {
        value_buffer.clear();
        let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+        let operation = operation(key);
        if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) {
            value_writer.insert(DelAdd::Deletion, value)?;
        }
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -1,5 +1,5 @@
 use std::borrow::Cow;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, BTreeSet};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
@ -9,7 +9,7 @@ use std::result::Result as StdResult;
 use bytemuck::bytes_of;
 use grenad::Sorter;
 use heed::BytesEncode;
-use itertools::EitherOrBoth;
+use itertools::{merge_join_by, EitherOrBoth};
 use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use serde_json::{from_slice, Value};
@ -18,7 +18,7 @@ use FilterableValues::{Empty, Null, Values};
 use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
 use crate::error::InternalError;
 use crate::facet::value_encoding::f64_into_bytes;
-use crate::update::del_add::{DelAdd, KvWriterDelAdd};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::{create_writer, writer_into_reader};
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
@ -45,7 +45,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
-    geo_fields_ids: Option<(FieldId, FieldId)>,
 ) -> Result<ExtractedFacetValues> {
    let max_memory = indexer.max_memory_by_thread();

@ -76,143 +75,181 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    let mut numbers_key_buffer = Vec::new();
    let mut strings_key_buffer = Vec::new();

-    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
-        let obkv = obkv::KvReader::new(value);
+    let old_faceted_fids: BTreeSet<_> =
+        settings_diff.old.faceted_fields_ids.iter().copied().collect();
+    let new_faceted_fids: BTreeSet<_> =
+        settings_diff.new.faceted_fields_ids.iter().copied().collect();

-        for (field_id, field_bytes) in obkv.iter() {
-            let delete_faceted = settings_diff.old.faceted_fields_ids.contains(&field_id);
-            let add_faceted = settings_diff.new.faceted_fields_ids.contains(&field_id);
-            if delete_faceted || add_faceted {
-                numbers_key_buffer.clear();
-                strings_key_buffer.clear();
+    if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
+        let mut cursor = obkv_documents.into_cursor()?;
+        while let Some((docid_bytes, value)) = cursor.move_on_next()? {
+            let obkv = obkv::KvReader::new(value);
+            let get_document_json_value = move |field_id, side| {
+                obkv.get(field_id)
+                    .map(KvReaderDelAdd::new)
+                    .and_then(|kv| kv.get(side))
+                    .map(from_slice)
+                    .transpose()
+                    .map_err(InternalError::SerdeJson)
+            };
+            // iterate over the faceted fields instead of over the whole document.
+            for eob in
+                merge_join_by(old_faceted_fids.iter(), new_faceted_fids.iter(), |old, new| {
+                    old.cmp(new)
+                })
+            {
+                let (field_id, del_value, add_value) = match eob {
+                    EitherOrBoth::Left(&field_id) => {
+                        let del_value = get_document_json_value(field_id, DelAdd::Deletion)?;

-                // Set key to the field_id
-                // Note: this encoding is consistent with FieldIdCodec
-                numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
-                strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                        // deletion only
+                        (field_id, del_value, None)
+                    }
+                    EitherOrBoth::Right(&field_id) => {
+                        let add_value = get_document_json_value(field_id, DelAdd::Addition)?;

-                let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
-                let document = DocumentId::from_be_bytes(document);
+                        // addition only
+                        (field_id, None, add_value)
+                    }
+                    EitherOrBoth::Both(&field_id, _) => {
+                        // during settings update, recompute the changing settings only.
+                        if settings_diff.settings_update_only {
+                            continue;
+                        }

-                // For the other extraction tasks, prefix the key with the field_id and the document_id
-                numbers_key_buffer.extend_from_slice(docid_bytes);
-                strings_key_buffer.extend_from_slice(docid_bytes);
+                        let del_value = get_document_json_value(field_id, DelAdd::Deletion)?;
+                        let add_value = get_document_json_value(field_id, DelAdd::Addition)?;

-                let del_add_obkv = obkv::KvReader::new(field_bytes);
-                let del_value = match del_add_obkv.get(DelAdd::Deletion).filter(|_| delete_faceted)
-                {
-                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
-                    None => None,
-                };
-                let add_value = match del_add_obkv.get(DelAdd::Addition).filter(|_| add_faceted) {
-                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
-                    None => None,
+                        (field_id, del_value, add_value)
+                    }
                };

-                // We insert the document id on the Del and the Add side if the field exists.
-                let (ref mut del_exists, ref mut add_exists) =
-                    facet_exists_docids.entry(field_id).or_default();
-                let (ref mut del_is_null, ref mut add_is_null) =
-                    facet_is_null_docids.entry(field_id).or_default();
-                let (ref mut del_is_empty, ref mut add_is_empty) =
-                    facet_is_empty_docids.entry(field_id).or_default();
+                if del_value.is_some() || add_value.is_some() {
+                    numbers_key_buffer.clear();
+                    strings_key_buffer.clear();

-                if del_value.is_some() {
-                    del_exists.insert(document);
-                }
-                if add_value.is_some() {
-                    add_exists.insert(document);
-                }
+                    // Set key to the field_id
+                    // Note: this encoding is consistent with FieldIdCodec
+                    numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                    strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());

-                let geo_support =
-                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
-                let del_filterable_values =
-                    del_value.map(|value| extract_facet_values(&value, geo_support));
-                let add_filterable_values =
-                    add_value.map(|value| extract_facet_values(&value, geo_support));
+                    let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
+                    let document = DocumentId::from_be_bytes(document);

-                // Those closures are just here to simplify things a bit.
-                let mut insert_numbers_diff = |del_numbers, add_numbers| {
-                    insert_numbers_diff(
-                        &mut fid_docid_facet_numbers_sorter,
-                        &mut numbers_key_buffer,
-                        del_numbers,
-                        add_numbers,
-                    )
-                };
-                let mut insert_strings_diff = |del_strings, add_strings| {
-                    insert_strings_diff(
-                        &mut fid_docid_facet_strings_sorter,
-                        &mut strings_key_buffer,
-                        del_strings,
-                        add_strings,
-                    )
-                };
+                    // For the other extraction tasks, prefix the key with the field_id and the document_id
+                    numbers_key_buffer.extend_from_slice(docid_bytes);
+                    strings_key_buffer.extend_from_slice(docid_bytes);

-                match (del_filterable_values, add_filterable_values) {
-                    (None, None) => (),
-                    (Some(del_filterable_values), None) => match del_filterable_values {
-                        Null => {
-                            del_is_null.insert(document);
-                        }
-                        Empty => {
-                            del_is_empty.insert(document);
-                        }
-                        Values { numbers, strings } => {
-                            insert_numbers_diff(numbers, vec![])?;
-                            insert_strings_diff(strings, vec![])?;
-                        }
-                    },
-                    (None, Some(add_filterable_values)) => match add_filterable_values {
-                        Null => {
-                            add_is_null.insert(document);
-                        }
-                        Empty => {
-                            add_is_empty.insert(document);
-                        }
-                        Values { numbers, strings } => {
-                            insert_numbers_diff(vec![], numbers)?;
-                            insert_strings_diff(vec![], strings)?;
-                        }
-                    },
-                    (Some(del_filterable_values), Some(add_filterable_values)) => {
-                        match (del_filterable_values, add_filterable_values) {
-                            (Null, Null) | (Empty, Empty) => (),
-                            (Null, Empty) => {
-                                del_is_null.insert(document);
-                                add_is_empty.insert(document);
-                            }
-                            (Empty, Null) => {
-                                del_is_empty.insert(document);
-                                add_is_null.insert(document);
-                            }
-                            (Null, Values { numbers, strings }) => {
-                                insert_numbers_diff(vec![], numbers)?;
-                                insert_strings_diff(vec![], strings)?;
+                    // We insert the document id on the Del and the Add side if the field exists.
+                    let (ref mut del_exists, ref mut add_exists) =
+                        facet_exists_docids.entry(field_id).or_default();
+                    let (ref mut del_is_null, ref mut add_is_null) =
+                        facet_is_null_docids.entry(field_id).or_default();
+                    let (ref mut del_is_empty, ref mut add_is_empty) =
+                        facet_is_empty_docids.entry(field_id).or_default();
+
+                    if del_value.is_some() {
+                        del_exists.insert(document);
+                    }
+                    if add_value.is_some() {
+                        add_exists.insert(document);
+                    }
+
+                    let del_geo_support = settings_diff
+                        .old
+                        .geo_fields_ids
+                        .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                    let add_geo_support = settings_diff
+                        .new
+                        .geo_fields_ids
+                        .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                    let del_filterable_values =
+                        del_value.map(|value| extract_facet_values(&value, del_geo_support));
+                    let add_filterable_values =
+                        add_value.map(|value| extract_facet_values(&value, add_geo_support));
+
+                    // Those closures are just here to simplify things a bit.
+                    let mut insert_numbers_diff = |del_numbers, add_numbers| {
+                        insert_numbers_diff(
+                            &mut fid_docid_facet_numbers_sorter,
+                            &mut numbers_key_buffer,
+                            del_numbers,
+                            add_numbers,
+                        )
+                    };
+                    let mut insert_strings_diff = |del_strings, add_strings| {
+                        insert_strings_diff(
+                            &mut fid_docid_facet_strings_sorter,
+                            &mut strings_key_buffer,
+                            del_strings,
+                            add_strings,
+                        )
+                    };
+
+                    match (del_filterable_values, add_filterable_values) {
+                        (None, None) => (),
+                        (Some(del_filterable_values), None) => match del_filterable_values {
+                            Null => {
                                del_is_null.insert(document);
                            }
-                            (Empty, Values { numbers, strings }) => {
-                                insert_numbers_diff(vec![], numbers)?;
-                                insert_strings_diff(vec![], strings)?;
+                            Empty => {
                                del_is_empty.insert(document);
                            }
-                            (Values { numbers, strings }, Null) => {
-                                add_is_null.insert(document);
+                            Values { numbers, strings } => {
                                insert_numbers_diff(numbers, vec![])?;
                                insert_strings_diff(strings, vec![])?;
                            }
-                            (Values { numbers, strings }, Empty) => {
-                                add_is_empty.insert(document);
-                                insert_numbers_diff(numbers, vec![])?;
-                                insert_strings_diff(strings, vec![])?;
+                        },
+                        (None, Some(add_filterable_values)) => match add_filterable_values {
+                            Null => {
+                                add_is_null.insert(document);
                            }
-                            (
-                                Values { numbers: del_numbers, strings: del_strings },
-                                Values { numbers: add_numbers, strings: add_strings },
-                            ) => {
-                                insert_numbers_diff(del_numbers, add_numbers)?;
-                                insert_strings_diff(del_strings, add_strings)?;
+                            Empty => {
+                                add_is_empty.insert(document);
+                            }
+                            Values { numbers, strings } => {
+                                insert_numbers_diff(vec![], numbers)?;
+                                insert_strings_diff(vec![], strings)?;
+                            }
+                        },
+                        (Some(del_filterable_values), Some(add_filterable_values)) => {
+                            match (del_filterable_values, add_filterable_values) {
+                                (Null, Null) | (Empty, Empty) => (),
+                                (Null, Empty) => {
+                                    del_is_null.insert(document);
+                                    add_is_empty.insert(document);
+                                }
+                                (Empty, Null) => {
+                                    del_is_empty.insert(document);
+                                    add_is_null.insert(document);
+                                }
+                                (Null, Values { numbers, strings }) => {
+                                    insert_numbers_diff(vec![], numbers)?;
+                                    insert_strings_diff(vec![], strings)?;
+                                    del_is_null.insert(document);
+                                }
+                                (Empty, Values { numbers, strings }) => {
+                                    insert_numbers_diff(vec![], numbers)?;
+                                    insert_strings_diff(vec![], strings)?;
+                                    del_is_empty.insert(document);
+                                }
+                                (Values { numbers, strings }, Null) => {
+                                    add_is_null.insert(document);
+                                    insert_numbers_diff(numbers, vec![])?;
+                                    insert_strings_diff(strings, vec![])?;
+                                }
+                                (Values { numbers, strings }, Empty) => {
+                                    add_is_empty.insert(document);
+                                    insert_numbers_diff(numbers, vec![])?;
+                                    insert_strings_diff(strings, vec![])?;
+                                }
+                                (
+                                    Values { numbers: del_numbers, strings: del_strings },
+                                    Values { numbers: add_numbers, strings: add_strings },
+                                ) => {
+                                    insert_numbers_diff(del_numbers, add_numbers)?;
+                                    insert_strings_diff(del_strings, add_strings)?;
+                                }
                            }
                        }
                    }
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::GeoError;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::extract_finite_float_from_value;
+use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::{FieldId, InternalError, Result};

 /// Extracts the geographical coordinates contained in each document under the `_geo` field.
@ -18,7 +19,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    primary_key_id: FieldId,
-    (lat_fid, lng_fid): (FieldId, FieldId),
+    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    let mut writer = create_writer(
        indexer.chunk_compression_type,
@ -38,47 +39,27 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
            serde_json::from_slice(document_id).unwrap()
        };

-        // first we get the two fields
-        match (obkv.get(lat_fid), obkv.get(lng_fid)) {
-            (Some(lat), Some(lng)) => {
-                let deladd_lat_obkv = KvReaderDelAdd::new(lat);
-                let deladd_lng_obkv = KvReaderDelAdd::new(lng);
+        // extract old version
+        let del_lat_lng =
+            extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
+        // extract new version
+        let add_lat_lng =
+            extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;

-                // then we extract the values
-                let del_lat_lng = deladd_lat_obkv
-                    .get(DelAdd::Deletion)
-                    .zip(deladd_lng_obkv.get(DelAdd::Deletion))
-                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
-                    .transpose()?;
-                let add_lat_lng = deladd_lat_obkv
-                    .get(DelAdd::Addition)
-                    .zip(deladd_lng_obkv.get(DelAdd::Addition))
-                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
-                    .transpose()?;
-
-                if del_lat_lng != add_lat_lng {
-                    let mut obkv = KvWriterDelAdd::memory();
-                    if let Some([lat, lng]) = del_lat_lng {
-                        #[allow(clippy::drop_non_drop)]
-                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
-                        obkv.insert(DelAdd::Deletion, bytes)?;
-                    }
-                    if let Some([lat, lng]) = add_lat_lng {
-                        #[allow(clippy::drop_non_drop)]
-                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
-                        obkv.insert(DelAdd::Addition, bytes)?;
-                    }
-                    let bytes = obkv.into_inner()?;
-                    writer.insert(docid_bytes, bytes)?;
-                }
+        if del_lat_lng != add_lat_lng {
+            let mut obkv = KvWriterDelAdd::memory();
+            if let Some([lat, lng]) = del_lat_lng {
+                #[allow(clippy::drop_non_drop)]
+                let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                obkv.insert(DelAdd::Deletion, bytes)?;
            }
-            (None, Some(_)) => {
-                return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+            if let Some([lat, lng]) = add_lat_lng {
+                #[allow(clippy::drop_non_drop)]
+                let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                obkv.insert(DelAdd::Addition, bytes)?;
            }
-            (Some(_), None) => {
-                return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
-            }
-            (None, None) => (),
+            let bytes = obkv.into_inner()?;
+            writer.insert(docid_bytes, bytes)?;
        }
    }

@ -86,16 +67,37 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
 }

 /// Extract the finite floats lat and lng from two bytes slices.
-fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
-    let lat = extract_finite_float_from_value(
-        serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
-    )
-    .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+fn extract_lat_lng(
+    document: &obkv::KvReader<FieldId>,
+    settings: &InnerIndexSettings,
+    deladd: DelAdd,
+    document_id: impl Fn() -> Value,
+) -> Result<Option<[f64; 2]>> {
+    match settings.geo_fields_ids {
+        Some((lat_fid, lng_fid)) => {
+            let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
+            let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
+            let (lat, lng) = match (lat, lng) {
+                (Some(lat), Some(lng)) => (lat, lng),
+                (Some(_), None) => {
+                    return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+                }
+                (None, Some(_)) => {
+                    return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
+                }
+                (None, None) => return Ok(None),
+            };
+            let lat = extract_finite_float_from_value(
+                serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
+            )
+            .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;

-    let lng = extract_finite_float_from_value(
-        serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
-    )
-    .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
-
-    Ok([lat, lng])
+            let lng = extract_finite_float_from_value(
+                serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
+            )
+            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
+            Ok(Some([lat, lng]))
+        }
+        None => Ok(None),
+    }
 }
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@ -26,11 +26,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
-    let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
-
    // early return if the data shouldn't be deleted nor created.
-    if !any_deletion && !any_addition {
+    if settings_diff.settings_update_only && !settings_diff.reindex_proximities() {
        let writer = create_writer(
            indexer.chunk_compression_type,
            indexer.chunk_compression_level,
@ -39,8 +36,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
        return writer_into_reader(writer);
    }

-    let max_memory = indexer.max_memory_by_thread();
+    let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
+    let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;

+    let max_memory = indexer.max_memory_by_thread();
    let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
        .map(|_| {
            create_sorter(
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -8,6 +8,7 @@ mod extract_vector_points;
 mod extract_word_docids;
 mod extract_word_pair_proximity_docids;
 mod extract_word_position_docids;
+// mod searchable;

 use std::fs::File;
 use std::io::BufReader;
@ -43,7 +44,6 @@ pub(crate) fn data_from_obkv_documents(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    primary_key_id: FieldId,
-    geo_fields_ids: Option<(FieldId, FieldId)>,
    settings_diff: Arc<InnerIndexSettingsDiff>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<()> {
@ -70,7 +70,6 @@ pub(crate) fn data_from_obkv_documents(
                        indexer,
                        lmdb_writer_sx.clone(),
                        primary_key_id,
-                        geo_fields_ids,
                        settings_diff.clone(),
                        max_positions_per_attributes,
                    )
@ -293,7 +292,6 @@ fn send_and_extract_flattened_documents_data(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    primary_key_id: FieldId,
-    geo_fields_ids: Option<(FieldId, FieldId)>,
    settings_diff: Arc<InnerIndexSettingsDiff>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(
@ -303,12 +301,13 @@ fn send_and_extract_flattened_documents_data(
    let flattened_documents_chunk =
        flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;

-    if let Some(geo_fields_ids) = geo_fields_ids {
+    if settings_diff.run_geo_indexing() {
        let documents_chunk_cloned = flattened_documents_chunk.clone();
        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
+        let settings_diff = settings_diff.clone();
        rayon::spawn(move || {
            let result =
-                extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
+                extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
            let _ = match result {
                Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
@ -347,7 +346,6 @@ fn send_and_extract_flattened_documents_data(
                    flattened_documents_chunk.clone(),
                    indexer,
                    &settings_diff,
-                    geo_fields_ids,
                )?;

                // send fid_docid_facet_numbers_chunk to DB writer
--- a/milli/src/update/index_documents/extract/searchable/field_word_position.rs
+++ b/milli/src/update/index_documents/extract/searchable/field_word_position.rs
@ -0,0 +1,211 @@
+use std::collections::HashMap;
+
+use charabia::normalizer::NormalizedTokenIter;
+use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
+use roaring::RoaringBitmap;
+use serde_json::Value;
+
+use crate::update::settings::InnerIndexSettings;
+use crate::{InternalError, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
+
+pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;
+
+pub struct FieldWordPositionExtractorBuilder<'a> {
+    max_positions_per_attributes: u16,
+    stop_words: Option<&'a fst::Set<Vec<u8>>>,
+    separators: Option<Vec<&'a str>>,
+    dictionary: Option<Vec<&'a str>>,
+}
+
+impl<'a> FieldWordPositionExtractorBuilder<'a> {
+    pub fn new(
+        max_positions_per_attributes: Option<u32>,
+        settings: &'a InnerIndexSettings,
+    ) -> Result<Self> {
+        let stop_words = settings.stop_words.as_ref();
+        let separators: Option<Vec<_>> =
+            settings.allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let dictionary: Option<Vec<_>> =
+            settings.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        Ok(Self {
+            max_positions_per_attributes: max_positions_per_attributes
+                .map_or(MAX_POSITION_PER_ATTRIBUTE as u16, |max| {
+                    max.min(MAX_POSITION_PER_ATTRIBUTE) as u16
+                }),
+            stop_words,
+            separators,
+            dictionary,
+        })
+    }
+
+    pub fn build(&'a self) -> FieldWordPositionExtractor<'a> {
+        let builder = tokenizer_builder(
+            self.stop_words,
+            self.separators.as_deref(),
+            self.dictionary.as_deref(),
+            None,
+        );
+
+        FieldWordPositionExtractor {
+            tokenizer: builder.into_tokenizer(),
+            max_positions_per_attributes: self.max_positions_per_attributes,
+        }
+    }
+}
+
+pub struct FieldWordPositionExtractor<'a> {
+    tokenizer: Tokenizer<'a>,
+    max_positions_per_attributes: u16,
+}
+
+impl<'a> FieldWordPositionExtractor<'a> {
+    pub fn extract<'b>(
+        &'a self,
+        field_bytes: &[u8],
+        buffer: &'b mut String,
+    ) -> Result<ExtractedFieldWordPosition<'a, 'b>> {
+        let field_value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+        Ok(ExtractedFieldWordPosition {
+            tokenizer: &self.tokenizer,
+            max_positions_per_attributes: self.max_positions_per_attributes,
+            field_value,
+            buffer: buffer,
+        })
+    }
+}
+
+pub struct ExtractedFieldWordPosition<'a, 'b> {
+    tokenizer: &'a Tokenizer<'a>,
+    max_positions_per_attributes: u16,
+    field_value: Value,
+    buffer: &'b mut String,
+}
+
+impl<'a> ExtractedFieldWordPosition<'a, '_> {
+    pub fn iter<'o>(&'o mut self) -> FieldWordPositionIter<'o> {
+        self.buffer.clear();
+        let inner = match json_to_string(&self.field_value, &mut self.buffer) {
+            Some(field) => Some(self.tokenizer.tokenize(field)),
+            None => None,
+        };
+
+        // create an iterator of token with their positions.
+        FieldWordPositionIter {
+            inner,
+            max_positions_per_attributes: self.max_positions_per_attributes,
+            position: 0,
+            prev_kind: None,
+        }
+    }
+}
+
+pub struct FieldWordPositionIter<'a> {
+    inner: Option<NormalizedTokenIter<'a, 'a>>,
+    max_positions_per_attributes: u16,
+    position: u16,
+    prev_kind: Option<TokenKind>,
+}
+
+impl<'a> Iterator for FieldWordPositionIter<'a> {
+    type Item = (u16, Token<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.position >= self.max_positions_per_attributes {
+            return None;
+        }
+
+        let token = self.inner.as_mut().map(|i| i.next()).flatten()?;
+
+        match token.kind {
+            TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
+                self.position += match self.prev_kind {
+                    Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
+                    Some(_) => 1,
+                    None => 0,
+                };
+                self.prev_kind = Some(token.kind)
+            }
+            TokenKind::Separator(_) if self.position == 0 => {
+                return self.next();
+            }
+            TokenKind::Separator(SeparatorKind::Hard) => {
+                self.prev_kind = Some(token.kind);
+            }
+            TokenKind::Separator(SeparatorKind::Soft)
+                if self.prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) =>
+            {
+                self.prev_kind = Some(token.kind);
+            }
+            _ => return self.next(),
+        }
+
+        if !token.is_word() {
+            return self.next();
+        }
+
+        // keep a word only if it is not empty and fit in a LMDB key.
+        let lemma = token.lemma().trim();
+        if !lemma.is_empty() && lemma.len() <= MAX_WORD_LENGTH {
+            Some((self.position, token))
+        } else {
+            self.next()
+        }
+    }
+}
+
+/// Factorize tokenizer building.
+pub fn tokenizer_builder<'a>(
+    stop_words: Option<&'a fst::Set<Vec<u8>>>,
+    allowed_separators: Option<&'a [&str]>,
+    dictionary: Option<&'a [&str]>,
+    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
+) -> TokenizerBuilder<'a, Vec<u8>> {
+    let mut tokenizer_builder = TokenizerBuilder::new();
+    if let Some(stop_words) = stop_words {
+        tokenizer_builder.stop_words(stop_words);
+    }
+    if let Some(dictionary) = dictionary {
+        tokenizer_builder.words_dict(dictionary);
+    }
+    if let Some(separators) = allowed_separators {
+        tokenizer_builder.separators(separators);
+    }
+
+    if let Some(script_language) = script_language {
+        tokenizer_builder.allow_list(script_language);
+    }
+
+    tokenizer_builder
+}
+
+/// Transform a JSON value into a string that can be indexed.
+fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a str> {
+    fn inner(value: &Value, output: &mut String) -> bool {
+        use std::fmt::Write;
+        match value {
+            Value::Null | Value::Object(_) => false,
+            Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
+            Value::Number(number) => write!(output, "{}", number).is_ok(),
+            Value::String(string) => write!(output, "{}", string).is_ok(),
+            Value::Array(array) => {
+                let mut count = 0;
+                for value in array {
+                    if inner(value, output) {
+                        output.push_str(". ");
+                        count += 1;
+                    }
+                }
+                // check that at least one value was written
+                count != 0
+            }
+        }
+    }
+
+    if let Value::String(string) = value {
+        Some(string)
+    } else if inner(value, buffer) {
+        Some(buffer)
+    } else {
+        None
+    }
+}
--- a/milli/src/update/index_documents/extract/searchable/mod.rs
+++ b/milli/src/update/index_documents/extract/searchable/mod.rs
@ -0,0 +1,114 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::convert::TryInto;
+use std::fs::File;
+use std::io;
+use std::io::BufReader;
+
+use field_word_position::FieldWordPositionExtractorBuilder;
+use obkv::KvReader;
+use roaring::RoaringBitmap;
+use word_docids::{WordDocidsDump, WordDocidsExtractor};
+
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::update::index_documents::extract::extract_docid_word_positions::ScriptLanguageDocidsMap;
+use crate::update::index_documents::GrenadParameters;
+use crate::update::settings::InnerIndexSettingsDiff;
+use crate::{FieldId, Result, SerializationError};
+
+mod field_word_position;
+mod word_docids;
+
+#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
+pub fn extract_searchable_data<R: io::Read + io::Seek>(
+    obkv_documents: grenad::Reader<R>,
+    indexer: GrenadParameters,
+    settings_diff: &InnerIndexSettingsDiff,
+    max_positions_per_attributes: Option<u32>,
+) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+    let searchable_fields_to_index = settings_diff.searchable_fields_to_index();
+
+    let mut documents_ids = RoaringBitmap::new();
+
+    let add_builder =
+        FieldWordPositionExtractorBuilder::new(max_positions_per_attributes, &settings_diff.new)?;
+    let add_token_positions_extractor = add_builder.build();
+    let del_builder;
+    let del_token_positions_extractor = if settings_diff.settings_update_only {
+        del_builder = FieldWordPositionExtractorBuilder::new(
+            max_positions_per_attributes,
+            &settings_diff.old,
+        )?;
+        del_builder.build()
+    } else {
+        add_builder.build()
+    };
+    let token_positions_extractor = &[del_token_positions_extractor, add_token_positions_extractor];
+
+    let mut word_map = BTreeMap::new();
+    let mut word_docids_extractor = WordDocidsExtractor::new(settings_diff);
+
+    let mut cursor = obkv_documents.into_cursor()?;
+    // loop over documents
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let document_id = key
+            .try_into()
+            .map(u32::from_be_bytes)
+            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+        let obkv = KvReader::<FieldId>::new(value);
+        // if the searchable fields didn't change, skip the searchable indexing for this document.
+        if !settings_diff.reindex_searchable()
+            && !searchable_fields_changed(&obkv, &searchable_fields_to_index)
+        {
+            continue;
+        }
+
+        documents_ids.push(document_id);
+
+        let mut buffer = String::new();
+        for field_id in searchable_fields_to_index.iter() {
+            let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
+
+            for (deladd, field_bytes) in field_obkv {
+                let mut extracted_positions =
+                    token_positions_extractor[deladd as usize].extract(field_bytes, &mut buffer)?;
+                for (position, token) in extracted_positions.iter() {
+                    let word = token.lemma().trim();
+                    if !word_map.contains_key(word) {
+                        word_map.insert(word.to_string(), word_map.len() as u32);
+                    }
+                    let word_id = word_map.get(word).unwrap();
+                    word_docids_extractor.insert(*word_id, *field_id, document_id, deladd);
+                }
+            }
+        }
+
+        if word_docids_extractor.rough_size_estimate()
+            > indexer.max_memory.map_or(512 * 1024 * 1024, |s| s.min(512 * 1024 * 1024))
+        {
+            let WordDocidsDump { .. } =
+                word_docids_extractor.dump(&word_map, &searchable_fields_to_index, indexer)?;
+        }
+    }
+
+    todo!()
+}
+
+/// Check if any searchable fields of a document changed.
+fn searchable_fields_changed(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &BTreeSet<FieldId>,
+) -> bool {
+    for field_id in searchable_fields {
+        let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
+        match (field_obkv.get(DelAdd::Deletion), field_obkv.get(DelAdd::Addition)) {
+            // if both fields are None, check the next field.
+            (None, None) => (),
+            // if both contains a value and values are the same, check the next field.
+            (Some(del), Some(add)) if del == add => (),
+            // otherwise the fields are different, return true.
+            _otherwise => return true,
+        }
+    }
+
+    false
+}
--- a/milli/src/update/index_documents/extract/searchable/word_docids.rs
+++ b/milli/src/update/index_documents/extract/searchable/word_docids.rs
@ -0,0 +1,203 @@
+use std::collections::hash_map::Entry::{Occupied, Vacant};
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::fs::File;
+use std::hash::Hash;
+use std::io::BufReader;
+use std::mem::size_of;
+
+use roaring::RoaringBitmap;
+
+use crate::update::del_add::KvWriterDelAdd;
+use crate::update::index_documents::extract::searchable::DelAdd;
+use crate::update::index_documents::{create_writer, writer_into_reader, GrenadParameters};
+use crate::update::settings::InnerIndexSettingsDiff;
+use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result};
+
+pub struct WordDocidsExtractor<'a> {
+    word_fid_docids: RevertedIndex<(u32, FieldId)>,
+    settings_diff: &'a InnerIndexSettingsDiff,
+}
+
+impl<'a> WordDocidsExtractor<'a> {
+    pub fn new(settings_diff: &'a InnerIndexSettingsDiff) -> Self {
+        Self { word_fid_docids: RevertedIndex::new(), settings_diff }
+    }
+    pub fn insert(&mut self, wordid: u32, fieldid: FieldId, docid: DocumentId, del_add: DelAdd) {
+        self.word_fid_docids.insert((wordid, fieldid), docid, del_add);
+    }
+
+    pub fn rough_size_estimate(&self) -> usize {
+        self.word_fid_docids.rough_size_estimate()
+    }
+
+    pub fn dump(
+        &mut self,
+        word_map: &BTreeMap<String, u32>,
+        fields: &BTreeSet<FieldId>,
+        indexer: GrenadParameters,
+    ) -> Result<WordDocidsDump> {
+        let mut word_fid_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut word_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut exact_word_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut exact_word_deletion = RoaringBitmap::new();
+        let mut exact_word_addition = RoaringBitmap::new();
+        let mut word_deletion = RoaringBitmap::new();
+        let mut word_addition = RoaringBitmap::new();
+        let mut key_buffer = Vec::new();
+        let mut bitmap_buffer = Vec::new();
+        let mut obkv_buffer = Vec::new();
+        for (word, wid) in word_map {
+            exact_word_deletion.clear();
+            exact_word_addition.clear();
+            word_deletion.clear();
+            word_addition.clear();
+            for fid in fields {
+                if let Some((deletion, addition)) = self.word_fid_docids.inner.get(&(*wid, *fid)) {
+                    if self.settings_diff.old.exact_attributes.contains(&fid) {
+                        exact_word_deletion |= deletion;
+                    } else {
+                        word_deletion |= deletion;
+                    }
+
+                    if self.settings_diff.new.exact_attributes.contains(&fid) {
+                        exact_word_addition |= addition;
+                    } else {
+                        word_addition |= addition;
+                    }
+
+                    if deletion != addition {
+                        key_buffer.clear();
+                        key_buffer.extend_from_slice(word.as_bytes());
+                        key_buffer.push(0);
+                        key_buffer.extend_from_slice(&fid.to_be_bytes());
+                        let value = bitmaps_into_deladd_obkv(
+                            deletion,
+                            addition,
+                            &mut obkv_buffer,
+                            &mut bitmap_buffer,
+                        )?;
+                        word_fid_docids_writer.insert(&key_buffer, value)?;
+                    }
+                }
+            }
+
+            key_buffer.clear();
+            key_buffer.extend_from_slice(word.as_bytes());
+            if exact_word_deletion != exact_word_addition {
+                let value = bitmaps_into_deladd_obkv(
+                    &exact_word_deletion,
+                    &exact_word_addition,
+                    &mut obkv_buffer,
+                    &mut bitmap_buffer,
+                )?;
+                exact_word_docids_writer.insert(&key_buffer, value)?;
+            }
+
+            if word_deletion != word_addition {
+                let value = bitmaps_into_deladd_obkv(
+                    &word_deletion,
+                    &word_addition,
+                    &mut obkv_buffer,
+                    &mut bitmap_buffer,
+                )?;
+                word_docids_writer.insert(&key_buffer, value)?;
+            }
+        }
+
+        self.word_fid_docids.clear();
+
+        Ok(WordDocidsDump {
+            word_fid_docids: writer_into_reader(word_fid_docids_writer)?,
+            word_docids: writer_into_reader(word_docids_writer)?,
+            exact_word_docids: writer_into_reader(exact_word_docids_writer)?,
+        })
+    }
+}
+
+fn bitmaps_into_deladd_obkv<'a>(
+    deletion: &RoaringBitmap,
+    addition: &RoaringBitmap,
+    obkv_buffer: &'a mut Vec<u8>,
+    bitmap_buffer: &mut Vec<u8>,
+) -> Result<&'a mut Vec<u8>> {
+    obkv_buffer.clear();
+    let mut value_writer = KvWriterDelAdd::new(obkv_buffer);
+    if !deletion.is_empty() {
+        bitmap_buffer.clear();
+        CboRoaringBitmapCodec::serialize_into(deletion, bitmap_buffer);
+        value_writer.insert(DelAdd::Deletion, &*bitmap_buffer)?;
+    }
+    if !addition.is_empty() {
+        bitmap_buffer.clear();
+        CboRoaringBitmapCodec::serialize_into(addition, bitmap_buffer);
+        value_writer.insert(DelAdd::Addition, &*bitmap_buffer)?;
+    }
+    Ok(value_writer.into_inner()?)
+}
+
+#[derive(Debug)]
+struct RevertedIndex<K> {
+    inner: HashMap<K, (RoaringBitmap, RoaringBitmap)>,
+    max_value_size: usize,
+}
+
+impl<K: PartialEq + Eq + Hash> RevertedIndex<K> {
+    pub fn insert(&mut self, key: K, docid: DocumentId, del_add: DelAdd) {
+        let size = match self.inner.entry(key) {
+            Occupied(mut entry) => {
+                let (ref mut del, ref mut add) = entry.get_mut();
+                match del_add {
+                    DelAdd::Deletion => del.insert(docid),
+                    DelAdd::Addition => add.insert(docid),
+                };
+                del.serialized_size() + add.serialized_size()
+            }
+            Vacant(entry) => {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(docid);
+                let size = bitmap.serialized_size();
+                match del_add {
+                    DelAdd::Deletion => entry.insert((bitmap, RoaringBitmap::new())),
+                    DelAdd::Addition => entry.insert((RoaringBitmap::new(), bitmap)),
+                };
+                size * 2
+            }
+        };
+
+        self.max_value_size = self.max_value_size.max(size);
+    }
+
+    pub fn new() -> Self {
+        Self { inner: HashMap::new(), max_value_size: 0 }
+    }
+
+    pub fn rough_size_estimate(&self) -> usize {
+        self.inner.len() * size_of::<K>() + self.inner.len() * self.max_value_size
+    }
+
+    fn clear(&mut self) {
+        self.max_value_size = 0;
+        self.inner.clear();
+    }
+}
+
+pub struct WordDocidsDump {
+    pub word_fid_docids: grenad::Reader<BufReader<File>>,
+    pub word_docids: grenad::Reader<BufReader<File>>,
+    pub exact_word_docids: grenad::Reader<BufReader<File>>,
+}
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -315,28 +315,6 @@ where
        // get the primary key field id
        let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();

-        // get the fid of the `_geo.lat` and `_geo.lng` fields.
-        let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
-
-        // self.index.fields_ids_map($a)? ==>> field_id_map
-        let geo_fields_ids = match field_id_map.id("_geo") {
-            Some(gfid) => {
-                let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
-                let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
-                // if `_geo` is faceted then we get the `lat` and `lng`
-                if is_sortable || is_filterable {
-                    let field_ids = field_id_map
-                        .insert("_geo.lat")
-                        .zip(field_id_map.insert("_geo.lng"))
-                        .ok_or(UserError::AttributeLimitReached)?;
-                    Some(field_ids)
-                } else {
-                    None
-                }
-            }
-            None => None,
-        };
-
        let pool_params = GrenadParameters {
            chunk_compression_type: self.indexer_config.chunk_compression_type,
            chunk_compression_level: self.indexer_config.chunk_compression_level,
@ -391,6 +369,7 @@ where

        // Run extraction pipeline in parallel.
        pool.install(|| {
+            let settings_diff_cloned = settings_diff.clone();
            rayon::spawn(move || {
                let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
                let _enter = child_span.enter();
@ -420,8 +399,7 @@ where
                        pool_params,
                        lmdb_writer_sx.clone(),
                        primary_key_id,
-                        geo_fields_ids,
-                        settings_diff.clone(),
+                        settings_diff_cloned,
                        max_positions_per_attributes,
                    )
                });
@ -448,7 +426,7 @@ where
                    Err(status) => {
                        if let Some(typed_chunks) = chunk_accumulator.pop_longest() {
                            let (docids, is_merged_database) =
-                                write_typed_chunk_into_index(typed_chunks, self.index, self.wtxn)?;
+                                write_typed_chunk_into_index(self.wtxn, self.index, &settings_diff, typed_chunks)?;
                            if !docids.is_empty() {
                                final_documents_ids |= docids;
                                let documents_seen_count = final_documents_ids.len();
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@ -20,7 +20,10 @@ use super::{IndexDocumentsMethod, IndexerConfig};
 use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::{db_name, main_key};
-use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
+use crate::update::del_add::{
+    into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
+    KvReaderDelAdd,
+};
 use crate::update::index_documents::GrenadParameters;
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
@ -805,13 +808,15 @@ impl<'a, 'i> Transform<'a, 'i> {
        let mut new_inner_settings = old_inner_settings.clone();
        new_inner_settings.fields_ids_map = fields_ids_map;

-        let settings_diff = InnerIndexSettingsDiff {
-            old: old_inner_settings,
-            new: new_inner_settings,
+        let embedding_configs_updated = false;
+        let settings_update_only = false;
+        let settings_diff = InnerIndexSettingsDiff::new(
+            old_inner_settings,
+            new_inner_settings,
            primary_key_id,
-            embedding_configs_updated: false,
-            settings_update_only: false,
-        };
+            embedding_configs_updated,
+            settings_update_only,
+        );

        Ok(TransformOutput {
            primary_key,
@ -840,14 +845,6 @@ impl<'a, 'i> Transform<'a, 'i> {
        // Always keep the primary key.
        let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };

-        // If only the `searchableAttributes` has been changed, keep only the searchable fields.
-        let must_reindex_searchables = settings_diff.reindex_searchable();
-        let necessary_searchable_field = |id: FieldId| -> bool {
-            must_reindex_searchables
-                && (settings_diff.old.searchable_fields_ids.contains(&id)
-                    || settings_diff.new.searchable_fields_ids.contains(&id))
-        };
-
        // If only a faceted field has been added, keep only this field.
        let must_reindex_facets = settings_diff.reindex_facets();
        let necessary_faceted_field = |id: FieldId| -> bool {
@ -862,13 +859,16 @@ impl<'a, 'i> Transform<'a, 'i> {
        // we need the fields for the prompt/templating.
        let reindex_vectors = settings_diff.reindex_vectors();

+        // The operations that we must perform on the different fields.
+        let mut operations = HashMap::new();
+
        let mut obkv_writer = KvWriter::<_, FieldId>::memory();
        for (id, val) in old_obkv.iter() {
-            if is_primary_key(id)
-                || necessary_searchable_field(id)
-                || necessary_faceted_field(id)
-                || reindex_vectors
-            {
+            if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors {
+                operations.insert(id, DelAddOperation::DeletionAndAddition);
+                obkv_writer.insert(id, val)?;
+            } else if let Some(operation) = settings_diff.reindex_searchable_id(id) {
+                operations.insert(id, operation);
                obkv_writer.insert(id, val)?;
            }
        }
@ -887,11 +887,9 @@ impl<'a, 'i> Transform<'a, 'i> {
            let flattened = flattened.as_deref().map_or(obkv, KvReader::new);

            flattened_obkv_buffer.clear();
-            into_del_add_obkv(
-                flattened,
-                DelAddOperation::DeletionAndAddition,
-                flattened_obkv_buffer,
-            )?;
+            into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
+                operations.get(&id).copied().unwrap_or(DelAddOperation::DeletionAndAddition)
+            })?;
        }

        Ok(())
@ -901,6 +899,11 @@ impl<'a, 'i> Transform<'a, 'i> {
    /// of the index with the attributes reordered accordingly to the `FieldsIdsMap` given as argument.
    ///
    // TODO this can be done in parallel by using the rayon `ThreadPool`.
+    #[tracing::instrument(
+        level = "trace"
+        skip(self, wtxn, settings_diff),
+        target = "indexing::documents"
+    )]
    pub fn prepare_for_documents_reindexing(
        self,
        wtxn: &mut heed::RwTxn<'i>,
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
 use grenad::{Merger, MergerBuilder};
 use heed::types::Bytes;
-use heed::RwTxn;
+use heed::{BytesDecode, RwTxn};
 use obkv::{KvReader, KvWriter};
 use roaring::RoaringBitmap;

@ -20,13 +20,16 @@ use super::MergeFn;
 use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
 use crate::facet::FacetType;
 use crate::index::db_name::DOCUMENTS;
+use crate::proximity::MAX_DISTANCE;
 use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
 use crate::update::facet::FacetsUpdate;
 use crate::update::index_documents::helpers::{
    as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
 };
+use crate::update::settings::InnerIndexSettingsDiff;
 use crate::{
-    lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError,
+    lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
+    Result, SerializationError, U8StrStrCodec,
 };

 /// This struct accumulates and group the TypedChunks
@ -122,9 +125,10 @@ impl TypedChunk {
 /// Return new documents seen.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
 pub(crate) fn write_typed_chunk_into_index(
-    typed_chunks: Vec<TypedChunk>,
-    index: &Index,
    wtxn: &mut RwTxn,
+    index: &Index,
+    settings_diff: &InnerIndexSettingsDiff,
+    typed_chunks: Vec<TypedChunk>,
 ) -> Result<(RoaringBitmap, bool)> {
    let mut is_merged_database = false;
    match typed_chunks[0] {
@ -485,13 +489,22 @@ pub(crate) fn write_typed_chunk_into_index(
            }
            let merger = builder.build();

-            write_entries_into_database(
-                merger,
-                &index.word_pair_proximity_docids,
-                wtxn,
-                deladd_serialize_add_side,
-                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
-            )?;
+            if settings_diff.only_additional_fields.is_some() {
+                write_proximity_entries_into_database_additional_searchables(
+                    merger,
+                    &index.word_pair_proximity_docids,
+                    wtxn,
+                )?;
+            } else {
+                write_entries_into_database(
+                    merger,
+                    &index.word_pair_proximity_docids,
+                    wtxn,
+                    deladd_serialize_add_side,
+                    merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
+                )?;
+            }
+
            is_merged_database = true;
        }
        TypedChunk::FieldIdDocidFacetNumbers(_) => {
@ -830,3 +843,51 @@ where
    }
    Ok(())
 }
+
+/// Akin to the `write_entries_into_database` function but specialized
+/// for the case when we only index additional searchable fields only.
+#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
+fn write_proximity_entries_into_database_additional_searchables<R>(
+    merger: Merger<R, MergeFn>,
+    database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
+    wtxn: &mut RwTxn,
+) -> Result<()>
+where
+    R: io::Read + io::Seek,
+{
+    let mut iter = merger.into_stream_merger_iter()?;
+    while let Some((key, value)) = iter.next()? {
+        if valid_lmdb_key(key) {
+            let (proximity_to_insert, word1, word2) =
+                U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
+            let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) {
+                Some(value) => {
+                    CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
+                }
+                None => continue,
+            };
+
+            let mut data_to_remove = RoaringBitmap::new();
+            for prox in 1..(MAX_DISTANCE as u8) {
+                let key = (prox, word1, word2);
+                let database_value = database.get(wtxn, &key)?.unwrap_or_default();
+                let value = if prox == proximity_to_insert {
+                    // Proximity that should be changed.
+                    // Union values and remove lower proximity data
+                    (&database_value | &data_to_insert) - &data_to_remove
+                } else {
+                    // Remove lower proximity data
+                    &database_value - &data_to_remove
+                };
+
+                // add the current data in data_to_remove for the next proximities
+                data_to_remove |= &value;
+
+                if database_value != value {
+                    database.put(wtxn, &key, &value)?;
+                }
+            }
+        }
+    }
+    Ok(())
+}
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@ -9,6 +9,7 @@ use itertools::{EitherOrBoth, Itertools};
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use time::OffsetDateTime;

+use super::del_add::DelAddOperation;
 use super::index_documents::{IndexDocumentsConfig, Transform};
 use super::IndexerConfig;
 use crate::criterion::Criterion;
@ -1072,13 +1073,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
            .index
            .primary_key(self.wtxn)?
            .and_then(|name| new_inner_settings.fields_ids_map.id(name));
-        let inner_settings_diff = InnerIndexSettingsDiff {
-            old: old_inner_settings,
-            new: new_inner_settings,
+        let settings_update_only = true;
+        let inner_settings_diff = InnerIndexSettingsDiff::new(
+            old_inner_settings,
+            new_inner_settings,
            primary_key_id,
            embedding_configs_updated,
-            settings_update_only: true,
-        };
+            settings_update_only,
+        );

        if inner_settings_diff.any_reindexing_needed() {
            self.reindex(&progress_callback, &should_abort, inner_settings_diff)?;
@ -1095,21 +1097,116 @@ pub struct InnerIndexSettingsDiff {
    // TODO: compare directly the embedders.
    pub(crate) embedding_configs_updated: bool,
    pub(crate) settings_update_only: bool,
+    /// The set of only the additional searchable fields.
+    /// If any other searchable field has been modified, is set to None.
+    pub(crate) only_additional_fields: Option<HashSet<String>>,
+
+    // Cache the check to see if all the stop_words, allowed_separators, dictionary,
+    // exact_attributes, proximity_precision are different.
+    pub(crate) cache_reindex_searchable_without_user_defined: bool,
+    // Cache the check to see if the user_defined_searchables are different.
+    pub(crate) cache_user_defined_searchables: bool,
+    // Cache the check to see if the exact_attributes are different.
+    pub(crate) cache_exact_attributes: bool,
 }

 impl InnerIndexSettingsDiff {
+    #[tracing::instrument(level = "trace", skip_all, target = "indexing::settings")]
+    pub(crate) fn new(
+        old_settings: InnerIndexSettings,
+        new_settings: InnerIndexSettings,
+        primary_key_id: Option<FieldId>,
+        embedding_configs_updated: bool,
+        settings_update_only: bool,
+    ) -> Self {
+        let only_additional_fields = match (
+            &old_settings.user_defined_searchable_fields,
+            &new_settings.user_defined_searchable_fields,
+        ) {
+            (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
+            (Some(old), Some(new)) => {
+                let old: HashSet<_> = old.iter().cloned().collect();
+                let new: HashSet<_> = new.iter().cloned().collect();
+                if old.difference(&new).next().is_none() {
+                    // if no field has been removed return only the additional ones
+                    Some(&new - &old).filter(|x| !x.is_empty())
+                } else {
+                    None
+                }
+            }
+        };
+
+        let cache_reindex_searchable_without_user_defined = {
+            old_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
+                != new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
+                || old_settings.allowed_separators != new_settings.allowed_separators
+                || old_settings.dictionary != new_settings.dictionary
+                || old_settings.proximity_precision != new_settings.proximity_precision
+        };
+
+        let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
+
+        let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
+            != new_settings.user_defined_searchable_fields;
+
+        InnerIndexSettingsDiff {
+            old: old_settings,
+            new: new_settings,
+            primary_key_id,
+            embedding_configs_updated,
+            settings_update_only,
+            only_additional_fields,
+            cache_reindex_searchable_without_user_defined,
+            cache_user_defined_searchables,
+            cache_exact_attributes,
+        }
+    }
+
+    pub fn searchable_fields_to_index(&self) -> BTreeSet<FieldId> {
+        if self.settings_update_only {
+            self.new
+                .fields_ids_map
+                .ids()
+                .filter(|id| self.reindex_searchable_id(*id).is_some())
+                .collect()
+        } else {
+            self.new.searchable_fields_ids.iter().copied().collect()
+        }
+    }
+
    pub fn any_reindexing_needed(&self) -> bool {
        self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
    }

    pub fn reindex_searchable(&self) -> bool {
-        self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
-            != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
-            || self.old.allowed_separators != self.new.allowed_separators
-            || self.old.dictionary != self.new.dictionary
-            || self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields
-            || self.old.exact_attributes != self.new.exact_attributes
-            || self.old.proximity_precision != self.new.proximity_precision
+        self.cache_reindex_searchable_without_user_defined
+            || self.cache_exact_attributes
+            || self.cache_user_defined_searchables
+    }
+
+    pub fn reindex_proximities(&self) -> bool {
+        // if any searchable settings force the reindexing
+        (self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables)
+        // and if any settings needs the proximity database created
+            && (self.old.proximity_precision == ProximityPrecision::ByAttribute
+                || self.new.proximity_precision == ProximityPrecision::ByAttribute)
+    }
+
+    pub fn reindex_searchable_id(&self, id: FieldId) -> Option<DelAddOperation> {
+        if self.cache_reindex_searchable_without_user_defined || self.cache_exact_attributes {
+            Some(DelAddOperation::DeletionAndAddition)
+        } else if let Some(only_additional_fields) = &self.only_additional_fields {
+            let additional_field = self.new.fields_ids_map.name(id).unwrap();
+            if only_additional_fields.contains(additional_field) {
+                Some(DelAddOperation::Addition)
+            } else {
+                None
+            }
+        } else if self.cache_user_defined_searchables {
+            Some(DelAddOperation::DeletionAndAddition)
+        } else {
+            None
+        }
    }

    pub fn reindex_facets(&self) -> bool {
@ -1142,6 +1239,11 @@ impl InnerIndexSettingsDiff {
        self.settings_update_only
    }

+    pub fn run_geo_indexing(&self) -> bool {
+        self.old.geo_fields_ids != self.new.geo_fields_ids
+            || (!self.settings_update_only && self.new.geo_fields_ids.is_some())
+    }
+
    pub fn modified_faceted_fields(&self) -> HashSet<String> {
        &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
    }
@ -1161,6 +1263,7 @@ pub(crate) struct InnerIndexSettings {
    pub proximity_precision: ProximityPrecision,
    pub embedding_configs: EmbeddingConfigs,
    pub existing_fields: HashSet<String>,
+    pub geo_fields_ids: Option<(FieldId, FieldId)>,
 }

 impl InnerIndexSettings {
@ -1169,7 +1272,7 @@ impl InnerIndexSettings {
        let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
        let allowed_separators = index.allowed_separators(rtxn)?;
        let dictionary = index.dictionary(rtxn)?;
-        let fields_ids_map = index.fields_ids_map(rtxn)?;
+        let mut fields_ids_map = index.fields_ids_map(rtxn)?;
        let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
        let user_defined_searchable_fields =
            user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
@ -1184,6 +1287,24 @@ impl InnerIndexSettings {
            .into_iter()
            .filter_map(|(field, count)| (count != 0).then_some(field))
            .collect();
+        // index.fields_ids_map($a)? ==>> fields_ids_map
+        let geo_fields_ids = match fields_ids_map.id("_geo") {
+            Some(gfid) => {
+                let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
+                let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
+                // if `_geo` is faceted then we get the `lat` and `lng`
+                if is_sortable || is_filterable {
+                    let field_ids = fields_ids_map
+                        .insert("_geo.lat")
+                        .zip(fields_ids_map.insert("_geo.lng"))
+                        .ok_or(UserError::AttributeLimitReached)?;
+                    Some(field_ids)
+                } else {
+                    None
+                }
+            }
+            None => None,
+        };

        Ok(Self {
            stop_words,
@ -1198,6 +1319,7 @@ impl InnerIndexSettings {
            proximity_precision,
            embedding_configs,
            existing_fields,
+            geo_fields_ids,
        })
    }

@ -1555,7 +1677,7 @@ mod tests {
        // When we search for something that is not in
        // the searchable fields it must not return any document.
        let result = index.search(&rtxn).query("23").execute().unwrap();
-        assert!(result.documents_ids.is_empty());
+        assert_eq!(result.documents_ids, Vec::<u32>::new());

        // When we search for something that is in the searchable fields
        // we must find the appropriate document.
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@ -159,6 +159,7 @@ pub fn expected_order(

    match optional_words {
        TermsMatchingStrategy::Last => groups.into_iter().flatten().collect(),
+        TermsMatchingStrategy::Frequency => groups.into_iter().flatten().collect(),
        TermsMatchingStrategy::All => {
            groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()
        }
--- a/workloads/embeddings-movies-subset-hf.json
+++ b/workloads/embeddings-movies-subset-hf.json
--- a/workloads/embeddings-settings-add.json
+++ b/workloads/embeddings-settings-add.json
Author	SHA1	Message	Date
ManyTheFish	9874efc352	WIP	2024-07-04 11:18:45 +02:00
meili-bors[bot]	a838f39fce	Merge #4682 4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-06-11 02:51:17 +00:00
meili-bors[bot]	7add7d053c	Merge #4689 4689: Bring back changes from v1.8.2 into v1.9.0 r=curquiza a=dureuill Co-authored-by: dureuill <dureuill@users.noreply.github.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>	2024-06-10 14:03:55 +00:00
Louis Dureuil	7559dfc814	Merge tag 'v1.8.2' into release-v1.9.0	2024-06-10 15:07:34 +02:00
meili-bors[bot]	6c6c4732a1	Merge #4681 4681: Fix concurrency issue r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4654 ## What does this PR do? - Asynchronously drop permits Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-10 09:36:08 +00:00
meili-bors[bot]	3976fe660e	Merge #4688 4688: Update version for the next release (v1.8.2) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-06-10 08:28:34 +00:00
Louis Dureuil	50f8218a5d	Asynchronously drop permits	2024-06-10 10:19:57 +02:00
dureuill	19585f1a4f	Update version for the next release (v1.8.2) in Cargo.toml	2024-06-10 07:59:36 +00:00
Clément Renault	8ec6e175e5	Replace roaring patch to the v0.10.5	2024-06-07 22:11:26 -04:00
Clément Renault	75b2e02cd2	Log more stuff around filtering	2024-06-06 11:00:07 -04:00
Clément Renault	40f05fe156	Bump roaring to the latest commit	2024-06-06 10:59:55 -04:00
Clément Renault	52d0d35b39	Revert "Reduce the universe while exploring the facet tree" because it's slower this way This reverts commit 14026115f21409535772ede0ee4273f37848dd61.	2024-06-06 09:17:51 -04:00
Clément Renault	5432776132	Reduce the universe while exploring the facet tree	2024-06-06 09:17:51 -04:00
Clément Renault	66470b27e6	Use the MultiOps trait for IN operations	2024-06-06 09:17:51 -04:00
Clément Renault	0a9bd398c7	Improve the NOT operator to use the universe when possible	2024-06-06 09:17:51 -04:00
Clément Renault	7967e93c16	Skip evaluating when a universe is empty, nothing can be found	2024-06-06 09:17:51 -04:00
Clément Renault	a6f3a01c6a	Expose the universe to do efficient intersections on deserialization	2024-06-06 09:17:51 -04:00
Clément Renault	4ca4a3f954	Make the CboRoaringBitmapCodec support intersection on deserialization	2024-06-06 09:17:51 -04:00
Clément Renault	e4a69c5ac3	Introduce the FacetGroupLazyValue type	2024-06-06 09:17:50 -04:00
Clément Renault	ff2e498267	Patch roaring to use the version supporting intersection on deserialization	2024-06-06 09:17:50 -04:00
Clément Renault	531e3d7d6a	MultiOps trait for OR operations	2024-06-06 09:17:50 -04:00
meili-bors[bot]	cb765ad249	Merge #4684 4684: Update Charabia v0.8.11 r=irevoire a=ManyTheFish # Update Charabia v0.8.11 ### Adds a new normalizer to normalize œ to oe and æ to ae Now search words containing `œ` or `æ` will be retrieved using `oe` or `ae`, like `Daemon` <=> `Dæmon` ### Fix: make `chinese-normalization-pinyin` feature flag compile Fixes #4629 Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-06-06 08:59:49 +00:00
ManyTheFish	2e50c6ec81	Update Charabia	2024-06-06 10:18:43 +02:00
meili-bors[bot]	40b2345394	Merge #4680 4680: Speedup additional searchables r=Kerollmops a=ManyTheFish Fixes #4492. ## To Do - [x] Do not call the `InnerSettingsDiff::only_additional_fields` function too many times Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-06-05 15:39:28 +00:00
ManyTheFish	30293883e0	Fix condition mistake	2024-06-05 17:30:07 +02:00
ManyTheFish	b833be46b9	Avoid running proximity when only the exact attributes changes	2024-06-05 17:30:07 +02:00
ManyTheFish	0a4118329e	Put only_additional_fields to None if the difference gives an empty result.	2024-06-05 17:30:07 +02:00
ManyTheFish	261e92d7e6	Skip iterating over documents when the faceted field list doesn't change	2024-06-05 17:30:07 +02:00
ManyTheFish	5cd08979b1	iterate over the faceted fields instead of over the whole document	2024-06-05 17:30:07 +02:00
Clément Renault	2af7e4dbe9	Rename the embeddings workloads	2024-06-05 17:30:07 +02:00
Clément Renault	a998b881f6	Cache a lot of operations to know if a field must be indexed	2024-06-05 17:30:07 +02:00
Clément Renault	b81953a65d	Add a span for the prepare_for_documents_reindexing	2024-06-05 17:30:07 +02:00
Clément Renault	091bb157f1	Add a span for the settings diff creation	2024-06-05 17:30:07 +02:00
Clément Renault	1b639ce44b	Reduce the number of complex calls to settings diff functions	2024-06-05 17:30:07 +02:00
Clément Renault	87cf8a3c94	Introduce a new way to determine the operations to perform on the fields	2024-06-05 17:30:07 +02:00
Clément Renault	0f578348f1	Introduce a dedicated function to write proximity entries in database	2024-06-05 17:30:07 +02:00
Clément Renault	fad4675abe	Give the settings diff to the write_typed_chunk_into_index function	2024-06-05 17:30:07 +02:00
Clément Renault	1ab03c4ede	Fix an issue with settings diff and * in the searchable attributes	2024-06-05 17:30:07 +02:00
Clément Renault	0c6e4b2f00	Introducing a new into_del_add_obkv_conditional_operation function	2024-06-05 17:30:07 +02:00
Clément Renault	42b3f52ef9	Introduce the SettingDiff only_additional_fields method	2024-06-05 17:30:07 +02:00
meili-bors[bot]	98e062a714	Merge #4675 4675: Update actix-web 4.5.1 -> 4.6.0 r=dureuill a=dureuill # Pull Request - actix-web 4.5.1 -> 4.6.0 - actix-http 3.6.0 -> 3.7.0 - actix-web-static-files (commit 2d3b6160) -> 4.0.1 - tracing-actix-web 0.7.9 -> 0.7.10 - brotli 3.4.0 -> 6.0.0 ## Related issue Fixes #4625 Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-05 07:40:35 +00:00
Louis Dureuil	8412665957	Update actix-web 4.5.1 -> 4.6.0	2024-06-04 09:54:30 +02:00
meili-bors[bot]	fc584f1db3	Merge #4666 4666: Add a score threshold search parameter r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4609 ## What does this PR do? - See [usage](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#95b76ded400342ba9ab3d67c734836f0) and [the known limitation](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#e4e32195bf0e4195b5daecdbb7a97a17) Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-03 08:42:44 +00:00
Louis Dureuil	2b6db6541e	Changes after review	2024-06-03 10:30:00 +02:00
meili-bors[bot]	d6bd88ce4f	Merge #4667 4667: Frequency matching strategy r=Kerollmops a=ManyTheFish # Pull Request ## Related issue Fixes #3773 ## What does this PR do? - add test for matching strategy - implement frequency matching strategy See the [PRD for more details](https://www.notion.so/meilisearch/Frequency-Matching-Strategy-0f3ba08833a442a39590a53a1505ab00). [Public API](https://www.notion.so/meilisearch/frequency-matching-strategy-89868fb7fc584026bc56e378eb854a7f). Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-05-30 14:53:31 +00:00
Louis Dureuil	c2fb7afe59	fmt	2024-05-30 12:06:46 +02:00
ManyTheFish	3f1a510069	Add tests and fix matching strategy	2024-05-30 12:02:42 +02:00
Louis Dureuil	41976b82b1	Tests for ranking_score_threshold	2024-05-30 11:22:26 +02:00
Louis Dureuil	c36410fcbf	Analytics for ranking score threshold	2024-05-30 11:22:12 +02:00
Louis Dureuil	7ce2691374	Add ranking score threshold to similar API	2024-05-30 11:21:31 +02:00
Louis Dureuil	4f03b0cf5b	Add ranking score threshold to similar	2024-05-30 11:20:50 +02:00
Louis Dureuil	c26db7878c	Expose rankingScoreThreshold in API	2024-05-30 10:32:35 +02:00
meili-bors[bot]	06a9803544	Merge #4664 4664: Update README.md r=curquiza a=tpayet Add hybrid & semantic as a feature # Pull Request ## Related issue Fixes #<issue_number> ## What does this PR do? - ... ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [ ] Have you read the contributing guidelines? - [ ] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Thomas Payet <thomas@meilisearch.com>	2024-05-29 16:55:20 +00:00
Thomas Payet	b2588d8101	Update README.md Add hybrid & semantic as a feature	2024-05-29 17:48:48 +02:00
meili-bors[bot]	62d27172f4	Merge #4663 4663: Bring back release v1.8.1 into main r=ManyTheFish a=ManyTheFish Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: ManyTheFish <ManyTheFish@users.noreply.github.com> Co-authored-by: Many the fish <many@meilisearch.com>	2024-05-29 14:47:38 +00:00
ManyTheFish	1ab88e10b9	Merge branch 'main' into merge-release-v1.8.1-in-main	2024-05-29 16:24:00 +02:00
ManyTheFish	6a4b2516aa	WIP	2024-05-29 16:21:24 +02:00
Louis Dureuil	aac1d769a7	Add ranking_score_threshold to milli	2024-05-29 14:17:09 +02:00
ManyTheFish	abdc4afcca	Implement Frequency matching strategy	2024-05-29 13:59:08 +02:00
meili-bors[bot]	75d5c0ae1f	Merge #4647 4647: Feature: get similar documents r=dureuill a=dureuill # Pull Request ## Related issue Fixes #4610 ## What does this PR do? [Usage](https://meilisearch.notion.site/Get-similar-documents-usage-540919ca755c4da0b7cdee273db3f290) Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-05-29 11:42:23 +00:00
meili-bors[bot]	a88554216a	Merge #4657 4657: Update version for the next release (v1.9.0) in Cargo.toml r=curquiza a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: curquiza <curquiza@users.noreply.github.com>	2024-05-29 11:14:19 +00:00
Louis Dureuil	2cf3e1c80a	Temporarily ignore perform snapshot test under Windows	2024-05-29 12:42:47 +02:00
Many the fish	e1fbfde6c4	Merge branch 'main' into merge-release-v1.8.1-in-main	2024-05-29 11:31:03 +02:00
ManyTheFish	27b75ec648	merge main into v1.8.1	2024-05-29 11:26:07 +02:00
curquiza	07fdb081a4	Update version for the next release (v1.9.0) in Cargo.toml	2024-05-28 14:19:40 +00:00
meili-bors[bot]	ba75d23bfe	Merge #4648 4648: Update version for the next release (v1.8.1) in Cargo.toml r=ManyTheFish a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: ManyTheFish <ManyTheFish@users.noreply.github.com>	2024-05-21 16:38:36 +00:00
ManyTheFish	7fbb3bf8e8	Update version for the next release (v1.8.1) in Cargo.toml	2024-05-21 15:13:03 +00:00
meili-bors[bot]	9066a446a3	Merge #4642 4642: Index the _geo fields when changing the setting while there is already documents in the DB r=ManyTheFish a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4640 Fixes https://github.com/meilisearch/meilisearch/issues/4628 ## What does this PR do? - Add an integration test that first indexes the document and then changes the settings - Fix `extract_geo_point` by detecting if the `_geo` field has been faceted in this setting change and index all documents Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-05-21 13:16:11 +00:00
ManyTheFish	f762307838	Fix clippy	2024-05-21 13:44:20 +02:00
ManyTheFish	3e94a90722	Fixes	2024-05-21 13:39:46 +02:00
ManyTheFish	fc7e817221	Index geo points based on the settings differences	2024-05-20 12:27:26 +02:00
Tamo	0f78703b85	add a test reproducing the bug	2024-05-20 10:58:08 +02:00